BREAKING(csv): move encoding/csv to own top-level folder and towards single-export files (#3228)

This commit is contained in:
Asher Gomez 2023-03-13 16:56:25 +11:00 committed by GitHub
parent fe01e03b7f
commit a5fffc5fb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1323 additions and 1112 deletions

View File

@ -3,7 +3,7 @@
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import { assert } from "../../_util/asserts.ts";
import { assert } from "../_util/asserts.ts";
export interface ReadOptions {
/** Character which separates values.

4
csv/mod.ts Normal file
View File

@ -0,0 +1,4 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
export * from "./stringify.ts";
export * from "./parse.ts";
export * from "./stream.ts";

View File

@ -1,17 +1,27 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import { assert } from "../../_util/asserts.ts";
import {
convertRowToObject,
ERR_BARE_QUOTE,
ERR_FIELD_COUNT,
ERR_INVALID_DELIM,
ERR_QUOTE,
ParseError,
type ReadOptions,
} from "./_io.ts";
import { assert } from "../_util/asserts.ts";
export {
ERR_BARE_QUOTE,
ERR_FIELD_COUNT,
ERR_INVALID_DELIM,
ERR_QUOTE,
ParseError,
ReadOptions,
} from "./_io.ts";
};
const BYTE_ORDER_MARK = "\ufeff";
export class Parser {
class Parser {
#input = "";
#cursor = 0;
#options: {
@ -275,3 +285,92 @@ export class Parser {
return result;
}
}
export interface ParseOptions extends ReadOptions {
/**
* If you provide `skipFirstRow: true` and `columns`, the first line will be
* skipped.
* If you provide `skipFirstRow: true` but not `columns`, the first line will
* be skipped and used as header definitions.
*/
skipFirstRow?: boolean;
/** List of names used for header definition. */
columns?: string[];
}
/**
* Csv parse helper to manipulate data.
* Provides an auto/custom mapper for columns.
*
* @example
* ```ts
* import { parse } from "https://deno.land/std@$STD_VERSION/csv/parse.ts";
* const string = "a,b,c\nd,e,f";
*
* console.log(
* await parse(string, {
* skipFirstRow: false,
* }),
* );
* // output:
* // [["a", "b", "c"], ["d", "e", "f"]]
* ```
*
* @param input Input to parse.
* @param opt options of the parser.
* @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`.
* If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unkown>[]`.
*/
export function parse(
input: string,
): string[][];
export function parse(
input: string,
opt: Omit<ParseOptions, "columns" | "skipFirstRow">,
): string[][];
export function parse(
input: string,
opt: Omit<ParseOptions, "columns"> & {
columns: string[];
},
): Record<string, unknown>[];
export function parse(
input: string,
opt: Omit<ParseOptions, "skipFirstRow"> & {
skipFirstRow: true;
},
): Record<string, unknown>[];
export function parse(
input: string,
opt: ParseOptions,
): string[][] | Record<string, unknown>[];
export function parse(
input: string,
opt: ParseOptions = {
skipFirstRow: false,
},
): string[][] | Record<string, unknown>[] {
const parser = new Parser(opt);
const r = parser.parse(input);
if (opt.skipFirstRow || opt.columns) {
let headers: string[] = [];
if (opt.skipFirstRow) {
const head = r.shift();
assert(head != null);
headers = head;
}
if (opt.columns) {
headers = opt.columns;
}
const firstLineIndex = opt.skipFirstRow ? 1 : 0;
return r.map((row, i) => {
return convertRowToObject(row, headers, firstLineIndex + i);
});
}
return r;
}

View File

@ -4,16 +4,10 @@
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import {
assert,
assertEquals,
assertStringIncludes,
assertThrows,
} from "../testing/asserts.ts";
import { assert, assertEquals, assertThrows } from "../testing/asserts.ts";
import { parse, ParseError } from "./parse.ts";
import type { AssertTrue, Has } from "../testing/types.ts";
import { parse, ParseError, stringify, StringifyError } from "./csv.ts";
const CRLF = "\r\n";
const BYTE_ORDER_MARK = "\ufeff";
Deno.test({
@ -823,541 +817,7 @@ Deno.test({
});
Deno.test({
name: "stringify",
async fn(t) {
await t.step({
name: "Access array index using string",
fn() {
const columns = ["a"];
const data = [["foo"], ["bar"]];
const errorMessage = 'Property accessor is not of type "number"';
assertThrows(
() => stringify(data, { columns }),
StringifyError,
errorMessage,
);
},
});
await t.step(
{
name: "Double quote in separator",
fn() {
const columns = [0];
const data = [["foo"], ["bar"]];
const errorMessage = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
const options = { separator: '"', columns };
assertThrows(
() => stringify(data, options),
StringifyError,
errorMessage,
);
},
},
);
await t.step(
{
name: "CRLF in separator",
fn() {
const columns = [0];
const data = [["foo"], ["bar"]];
const errorMessage = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
const options = { separator: "\r\n", columns };
assertThrows(
() => stringify(data, options),
StringifyError,
errorMessage,
);
},
},
);
await t.step(
{
name: "Invalid data, no columns",
fn() {
const data = [{ a: 1 }, { a: 2 }];
assertThrows(
() => stringify(data),
StringifyError,
"No property accessor function was provided for object",
);
},
},
);
await t.step(
{
name: "Invalid data, no columns",
fn() {
const data = [{ a: 1 }, { a: 2 }];
assertThrows(
() => stringify(data),
StringifyError,
"No property accessor function was provided for object",
);
},
},
);
await t.step(
{
name: "No data, no columns",
fn() {
const columns: string[] = [];
const data: string[][] = [];
const output = CRLF;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "No data, no columns, no headers",
fn() {
const columns: string[] = [];
const data: string[][] = [];
const output = ``;
const options = { headers: false, columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "No data, columns",
fn() {
const columns = ["a"];
const data: string[][] = [];
const output = `a${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "No data, columns, no headers",
fn() {
const columns = ["a"];
const data: string[][] = [];
const output = ``;
const options = { headers: false, columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Separator: CR",
fn() {
const columns = [0, 1];
const data = [["foo", "bar"], ["baz", "qux"]];
const output = `0\r1${CRLF}foo\rbar${CRLF}baz\rqux${CRLF}`;
const options = { separator: "\r", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Separator: LF",
fn() {
const columns = [0, 1];
const data = [["foo", "bar"], ["baz", "qux"]];
const output = `0\n1${CRLF}foo\nbar${CRLF}baz\nqux${CRLF}`;
const options = { separator: "\n", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Column: number accessor",
fn() {
const columns = [1];
const data = [{ 1: 1 }, { 1: 2 }];
const output = `1${CRLF}1${CRLF}2${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Explicit header value, no headers",
fn() {
const columns = [{ header: "Value", prop: "value" }];
const data = [{ value: "foo" }, { value: "bar" }];
const output = `foo${CRLF}bar${CRLF}`;
const options = { headers: false, columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Column: number accessor,const data = array",
fn() {
const columns = [1];
const data = [["key", "foo"], ["key", "bar"]];
const output = `1${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array number accessor",
fn() {
const columns = [[1]];
const data = [{ 1: 1 }, { 1: 2 }];
const output = `1${CRLF}1${CRLF}2${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array number accessor,const data = array",
fn() {
const columns = [[1]];
const data = [["key", "foo"], ["key", "bar"]];
const output = `1${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array number accessor,const data = array",
fn() {
const columns = [[1, 1]];
const data = [["key", ["key", "foo"]], ["key", ["key", "bar"]]];
const output = `1${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: string accessor",
fn() {
const columns = ["value"];
const data = [{ value: "foo" }, { value: "bar" }];
const output = `value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array string accessor",
fn() {
const columns = [["value"]];
const data = [{ value: "foo" }, { value: "bar" }];
const output = `value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array string accessor",
fn() {
const columns = [["msg", "value"]];
const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }];
const output = `value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Explicit header",
fn() {
const columns = [
{
header: "Value",
prop: ["msg", "value"],
},
];
const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }];
const output = `Value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: object",
fn() {
const columns = [0];
const data = [[{ value: "foo" }], [{ value: "bar" }]];
const output =
`0${CRLF}"{""value"":""foo""}"${CRLF}"{""value"":""bar""}"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: arary of objects",
fn() {
const columns = [0];
const data = [
[[{ value: "foo" }, { value: "bar" }]],
[[{ value: "baz" }, { value: "qux" }]],
];
const output =
`0${CRLF}"[{""value"":""foo""},{""value"":""bar""}]"${CRLF}"[{""value"":""baz""},{""value"":""qux""}]"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: array",
fn() {
const columns = [0];
const data = [[["foo", "bar"]], [["baz", "qux"]]];
const output =
`0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: array, separator: tab",
fn() {
const columns = [0];
const data = [[["foo", "bar"]], [["baz", "qux"]]];
const output =
`0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`;
const options = { separator: "\t", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Targeted value: undefined",
fn() {
const columns = [0];
const data = [[], []];
const output = `0${CRLF}${CRLF}${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: null",
fn() {
const columns = [0];
const data = [[null], [null]];
const output = `0${CRLF}${CRLF}${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: hex number",
fn() {
const columns = [0];
const data = [[0xa], [0xb]];
const output = `0${CRLF}10${CRLF}11${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: BigInt",
fn() {
const columns = [0];
const data = [[BigInt("1")], [BigInt("2")]];
const output = `0${CRLF}1${CRLF}2${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: boolean",
fn() {
const columns = [0];
const data = [[true], [false]];
const output = `0${CRLF}true${CRLF}false${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: string",
fn() {
const columns = [0];
const data = [["foo"], ["bar"]];
const output = `0${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: symbol",
fn() {
const columns = [0];
const data = [[Symbol("foo")], [Symbol("bar")]];
const output = `0${CRLF}Symbol(foo)${CRLF}Symbol(bar)${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: function",
fn() {
const columns = [0];
const data = [[(n: number) => n]];
const output = `0${CRLF}(n)=>n${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with double quote",
fn() {
const columns = [0];
const data = [['foo"']];
const output = `0${CRLF}"foo"""${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with CRLF",
fn() {
const columns = [0];
const data = [["foo\r\n"]];
const output = `0${CRLF}"foo\r\n"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with CR",
fn() {
const columns = [0];
const data = [["foo\r"]];
const output = `0${CRLF}foo\r${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with LF",
fn() {
const columns = [0];
const data = [["foo\n"]];
const output = `0${CRLF}"foo\n"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with comma",
fn() {
const columns = [0];
const data = [["foo,"]];
const output = `0${CRLF}"foo,"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with comma, tab separator",
fn() {
const columns = [0];
const data = [["foo,"]];
const output = `0${CRLF}foo,${CRLF}`;
const options = { separator: "\t", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step({
name: "Valid data, no columns",
fn() {
const data = [[1, 2, 3], [4, 5, 6]];
const output = `${CRLF}1,2,3${CRLF}4,5,6${CRLF}`;
assertEquals(stringify(data), output);
},
});
await t.step(
{
name: "byte-order mark with bom=true",
fn() {
const data = [["abc"]];
const output = `${BYTE_ORDER_MARK}abc${CRLF}`;
const options = { headers: false, bom: true };
assertStringIncludes(stringify(data, options), BYTE_ORDER_MARK);
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "no byte-order mark with omitted bom option",
fn() {
const data = [["abc"]];
const output = `abc${CRLF}`;
const options = { headers: false };
assert(!stringify(data, options).includes(BYTE_ORDER_MARK));
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "no byte-order mark with bom=false",
fn() {
const data = [["abc"]];
const output = `abc${CRLF}`;
const options = { headers: false, bom: false };
assert(!stringify(data, options).includes(BYTE_ORDER_MARK));
assertEquals(stringify(data, options), output);
},
},
);
},
});
Deno.test({
name: "[encoding/csv] correct typing",
name: "[csv] correct typing",
fn() {
{
const parsed = parse("a\nb");

149
csv/stream.ts Normal file
View File

@ -0,0 +1,149 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import {
convertRowToObject,
defaultReadOptions,
type LineReader,
parseRecord,
type RowType,
} from "../csv/_io.ts";
import { TextDelimiterStream } from "../streams/text_delimiter_stream.ts";
export interface CsvStreamOptions {
separator?: string;
comment?: string;
skipFirstRow?: boolean;
columns?: string[];
}
class StreamLineReader implements LineReader {
#reader: ReadableStreamDefaultReader<string>;
#done = false;
constructor(reader: ReadableStreamDefaultReader<string>) {
this.#reader = reader;
}
async readLine(): Promise<string | null> {
const { value, done } = await this.#reader.read();
if (done) {
this.#done = true;
return null;
} else {
// NOTE: Remove trailing CR for compatibility with golang's `encoding/csv`
return stripLastCR(value!);
}
}
isEOF(): Promise<boolean> {
return Promise.resolve(this.#done);
}
cancel() {
this.#reader.cancel();
}
}
function stripLastCR(s: string): string {
return s.endsWith("\r") ? s.slice(0, -1) : s;
}
export class CsvStream<T extends CsvStreamOptions>
implements TransformStream<string, RowType<CsvStreamOptions, T>> {
readonly #readable: ReadableStream<
string[] | Record<string, string | unknown>
>;
readonly #options: CsvStreamOptions;
readonly #lineReader: StreamLineReader;
readonly #lines: TextDelimiterStream;
#lineIndex = 0;
#isFirstRow = true;
#headers: string[] = [];
constructor(options: T = defaultReadOptions as T) {
this.#options = {
...defaultReadOptions,
...options,
};
this.#lines = new TextDelimiterStream("\n");
this.#lineReader = new StreamLineReader(this.#lines.readable.getReader());
this.#readable = new ReadableStream({
pull: (controller) => this.#pull(controller),
cancel: () => this.#lineReader.cancel(),
});
}
async #pull(
controller: ReadableStreamDefaultController<
string[] | Record<string, string | unknown>
>,
): Promise<void> {
const line = await this.#lineReader.readLine();
if (line === "") {
// Found an empty line
this.#lineIndex++;
return this.#pull(controller);
}
if (line === null) {
// Reached to EOF
controller.close();
this.#lineReader.cancel();
return;
}
const record = await parseRecord(
line,
this.#lineReader,
this.#options,
this.#lineIndex,
);
if (record === null) {
controller.close();
this.#lineReader.cancel();
return;
}
if (this.#isFirstRow) {
this.#isFirstRow = false;
if (this.#options.skipFirstRow || this.#options.columns) {
this.#headers = [];
if (this.#options.skipFirstRow) {
const head = record;
this.#headers = head;
}
if (this.#options.columns) {
this.#headers = this.#options.columns;
}
}
if (this.#options.skipFirstRow) {
return this.#pull(controller);
}
}
this.#lineIndex++;
if (record.length > 0) {
if (this.#options.skipFirstRow || this.#options.columns) {
controller.enqueue(convertRowToObject(
record,
this.#headers,
this.#lineIndex,
));
} else {
controller.enqueue(record);
}
} else {
return this.#pull(controller);
}
}
get readable() {
return this.#readable as ReadableStream<RowType<CsvStreamOptions, T>>;
}
get writable(): WritableStream<string> {
return this.#lines.writable;
}
}

View File

@ -2,23 +2,23 @@
import { CsvStream } from "./stream.ts";
import type { CsvStreamOptions } from "./stream.ts";
import { ERR_QUOTE, ParseError } from "./_io.ts";
import { readableStreamFromIterable } from "../../streams/readable_stream_from_iterable.ts";
import { readableStreamFromReader } from "../../streams/readable_stream_from_reader.ts";
import { readableStreamFromIterable } from "../streams/readable_stream_from_iterable.ts";
import { readableStreamFromReader } from "../streams/readable_stream_from_reader.ts";
import {
assert,
assertEquals,
assertRejects,
assertStringIncludes,
} from "../../testing/asserts.ts";
import type { AssertTrue, Has } from "../../testing/types.ts";
import { fromFileUrl, join } from "../../path/mod.ts";
import { StringReader } from "../../io/string_reader.ts";
} from "../testing/asserts.ts";
import type { AssertTrue, Has } from "../testing/types.ts";
import { fromFileUrl, join } from "../path/mod.ts";
import { StringReader } from "../io/string_reader.ts";
const testdataDir = join(fromFileUrl(import.meta.url), "../../testdata");
const testdataDir = join(fromFileUrl(import.meta.url), "../testdata");
const encoder = new TextEncoder();
Deno.test({
name: "[encoding/csv/stream] CsvStream should work with Deno.File",
name: "[csv/stream] CsvStream should work with Deno.File",
permissions: {
read: [testdataDir],
},
@ -40,7 +40,7 @@ Deno.test({
});
Deno.test({
name: "[encoding/csv/stream] CsvStream with invalid csv",
name: "[csv/stream] CsvStream with invalid csv",
fn: async () => {
const readable = readableStreamFromIterable([
encoder.encode("id,name\n"),
@ -63,7 +63,7 @@ Deno.test({
});
Deno.test({
name: "[encoding/csv/stream] CsvStream with various inputs",
name: "[csv/stream] CsvStream with various inputs",
permissions: "none",
fn: async (t) => {
// These test cases were originally ported from Go:
@ -371,8 +371,7 @@ export const MyTextDecoderStream = () => {
};
Deno.test({
name:
"[encoding/csv/stream] cancel CsvStream during iteration does not leak file",
name: "[csv/stream] cancel CsvStream during iteration does not leak file",
permissions: { read: [testdataDir] },
// TODO(kt3k): Enable this test on windows.
// See https://github.com/denoland/deno_std/issues/3160
@ -388,7 +387,7 @@ Deno.test({
});
Deno.test({
name: "[encoding/csv/stream] correct typing",
name: "[csv/stream] correct typing",
fn() {
{
const { readable } = new CsvStream();

312
csv/stringify.ts Normal file
View File

@ -0,0 +1,312 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
type PropertyAccessor = number | string;
type ObjectWithStringPropertyKeys = Record<string, unknown>;
/**
* @param header Explicit column header name. If omitted,
* the (final) property accessor is used for this value.
*
* @param prop Property accessor(s) used to access the value on the object
*/
export type ColumnDetails = {
header?: string;
prop: PropertyAccessor | PropertyAccessor[];
};
/**
* The most essential aspect of a column is accessing the property holding the
* data for that column on each object in the data array. If that member is at
* the top level, `Column` can simply be a property accessor, which is either a
* `string` (if it's a plain object) or a `number` (if it's an array).
*
* ```ts
* const columns = [
* "name",
* ];
* ```
*
* Each property accessor will be used as the header for the column:
*
* | name |
* | :--: |
* | Deno |
*
* - If the required data is not at the top level (it's nested in other
* objects/arrays), then a simple property accessor won't work, so an array of
* them will be required.
*
* ```ts
* const columns = [
* ["repo", "name"],
* ["repo", "org"],
* ];
* ```
*
* When using arrays of property accessors, the header names inherit the value
* of the last accessor in each array:
*
* | name | org |
* | :--: | :------: |
* | deno | denoland |
*
* - If a different column header is desired, then a `ColumnDetails` object type
* can be used for each column:
*
* - **`header?: string`** is the optional value to use for the column header
* name
*
* - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor
* (`string` or `number`) or array of property accessors used to access the
* data on each object
*
* ```ts
* const columns = [
* "name",
* {
* prop: ["runsOn", 0],
* header: "language 1",
* },
* {
* prop: ["runsOn", 1],
* header: "language 2",
* },
* ];
* ```
*
* | name | language 1 | language 2 |
* | :--: | :--------: | :--------: |
* | Deno | Rust | TypeScript |
*/
export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[];
/** An object (plain or array) */
export type DataItem = ObjectWithStringPropertyKeys | unknown[];
export type StringifyOptions = {
/** Whether to include the row of headers or not.
*
* @default {true}
*/
headers?: boolean;
/**
* Delimiter used to separate values. Examples:
* - `","` _comma_
* - `"\t"` _tab_
* - `"|"` _pipe_
* - etc.
*
* @default {","}
*/
separator?: string;
/**
* a list of instructions for how to target and transform the data for each
* column of output. This is also where you can provide an explicit header
* name for the column.
*/
columns?: Column[];
/**
* Whether to add a
* [byte-order mark](https://en.wikipedia.org/wiki/Byte_order_mark) to the
* beginning of the file content. Required by software such as MS Excel to
* properly display Unicode text.
*
* @default {false}
*/
bom?: boolean;
};
const QUOTE = '"';
const LF = "\n";
const CRLF = "\r\n";
const BYTE_ORDER_MARK = "\ufeff";
function getEscapedString(value: unknown, sep: string): string {
if (value === undefined || value === null) return "";
let str = "";
if (typeof value === "object") str = JSON.stringify(value);
else str = String(value);
// Is regex.test more performant here? If so, how to dynamically create?
// https://stackoverflow.com/questions/3561493/
if (str.includes(sep) || str.includes(LF) || str.includes(QUOTE)) {
return `${QUOTE}${str.replaceAll(QUOTE, `${QUOTE}${QUOTE}`)}${QUOTE}`;
}
return str;
}
type NormalizedColumn = Omit<ColumnDetails, "header" | "prop"> & {
header: string;
prop: PropertyAccessor[];
};
function normalizeColumn(column: Column): NormalizedColumn {
let header: NormalizedColumn["header"],
prop: NormalizedColumn["prop"];
if (typeof column === "object") {
if (Array.isArray(column)) {
header = String(column[column.length - 1]);
prop = column;
} else {
prop = Array.isArray(column.prop) ? column.prop : [column.prop];
header = typeof column.header === "string"
? column.header
: String(prop[prop.length - 1]);
}
} else {
header = String(column);
prop = [column];
}
return { header, prop };
}
export class StringifyError extends Error {
override readonly name = "StringifyError";
}
/**
* Returns an array of values from an object using the property accessors
* (and optional transform function) in each column
*/
function getValuesFromItem(
item: DataItem,
normalizedColumns: NormalizedColumn[],
): unknown[] {
const values: unknown[] = [];
if (normalizedColumns.length) {
for (const column of normalizedColumns) {
let value: unknown = item;
for (const prop of column.prop) {
if (typeof value !== "object" || value === null) continue;
if (Array.isArray(value)) {
if (typeof prop === "number") value = value[prop];
else {
throw new StringifyError(
'Property accessor is not of type "number"',
);
}
} // I think this assertion is safe. Confirm?
else value = (value as ObjectWithStringPropertyKeys)[prop];
}
values.push(value);
}
} else {
if (Array.isArray(item)) {
values.push(...item);
} else if (typeof item === "object") {
throw new StringifyError(
"No property accessor function was provided for object",
);
} else {
values.push(item);
}
}
return values;
}
/**
* @param data The source data to stringify. It's an array of items which are
* plain objects or arrays.
*
* `DataItem: Record<string, unknown> | unknown[]`
*
* ```ts
* const data = [
* {
* name: "Deno",
* repo: { org: "denoland", name: "deno" },
* runsOn: ["Rust", "TypeScript"],
* },
* ];
* ```
*
* @example
* ```ts
* import {
* Column,
* stringify,
* } from "https://deno.land/std@$STD_VERSION/csv/stringify.ts";
*
* type Character = {
* age: number;
* name: {
* first: string;
* last: string;
* };
* };
*
* const data: Character[] = [
* {
* age: 70,
* name: {
* first: "Rick",
* last: "Sanchez",
* },
* },
* {
* age: 14,
* name: {
* first: "Morty",
* last: "Smith",
* },
* },
* ];
*
* let columns: Column[] = [
* ["name", "first"],
* "age",
* ];
*
* console.log(stringify(data, { columns }));
* // first,age
* // Rick,70
* // Morty,14
* ```
*
* @param options Output formatting options
*/
export function stringify(
data: DataItem[],
{ headers = true, separator: sep = ",", columns = [], bom = false }:
StringifyOptions = {},
): string {
if (sep.includes(QUOTE) || sep.includes(CRLF)) {
const message = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
throw new StringifyError(message);
}
const normalizedColumns = columns.map(normalizeColumn);
let output = "";
if (bom) {
output += BYTE_ORDER_MARK;
}
if (headers) {
output += normalizedColumns
.map((column) => getEscapedString(column.header, sep))
.join(sep);
output += CRLF;
}
for (const item of data) {
const values = getValuesFromItem(item, normalizedColumns);
output += values
.map((value) => getEscapedString(value, sep))
.join(sep);
output += CRLF;
}
return output;
}

550
csv/stringify_test.ts Normal file
View File

@ -0,0 +1,550 @@
// Test ported from Golang
// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import {
assert,
assertEquals,
assertStringIncludes,
assertThrows,
} from "../testing/asserts.ts";
import { stringify, StringifyError } from "./stringify.ts";
const CRLF = "\r\n";
const BYTE_ORDER_MARK = "\ufeff";
Deno.test({
name: "stringify",
async fn(t) {
await t.step({
name: "Access array index using string",
fn() {
const columns = ["a"];
const data = [["foo"], ["bar"]];
const errorMessage = 'Property accessor is not of type "number"';
assertThrows(
() => stringify(data, { columns }),
StringifyError,
errorMessage,
);
},
});
await t.step(
{
name: "Double quote in separator",
fn() {
const columns = [0];
const data = [["foo"], ["bar"]];
const errorMessage = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
const options = { separator: '"', columns };
assertThrows(
() => stringify(data, options),
StringifyError,
errorMessage,
);
},
},
);
await t.step(
{
name: "CRLF in separator",
fn() {
const columns = [0];
const data = [["foo"], ["bar"]];
const errorMessage = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
const options = { separator: "\r\n", columns };
assertThrows(
() => stringify(data, options),
StringifyError,
errorMessage,
);
},
},
);
await t.step(
{
name: "Invalid data, no columns",
fn() {
const data = [{ a: 1 }, { a: 2 }];
assertThrows(
() => stringify(data),
StringifyError,
"No property accessor function was provided for object",
);
},
},
);
await t.step(
{
name: "Invalid data, no columns",
fn() {
const data = [{ a: 1 }, { a: 2 }];
assertThrows(
() => stringify(data),
StringifyError,
"No property accessor function was provided for object",
);
},
},
);
await t.step(
{
name: "No data, no columns",
fn() {
const columns: string[] = [];
const data: string[][] = [];
const output = CRLF;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "No data, no columns, no headers",
fn() {
const columns: string[] = [];
const data: string[][] = [];
const output = ``;
const options = { headers: false, columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "No data, columns",
fn() {
const columns = ["a"];
const data: string[][] = [];
const output = `a${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "No data, columns, no headers",
fn() {
const columns = ["a"];
const data: string[][] = [];
const output = ``;
const options = { headers: false, columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Separator: CR",
fn() {
const columns = [0, 1];
const data = [["foo", "bar"], ["baz", "qux"]];
const output = `0\r1${CRLF}foo\rbar${CRLF}baz\rqux${CRLF}`;
const options = { separator: "\r", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Separator: LF",
fn() {
const columns = [0, 1];
const data = [["foo", "bar"], ["baz", "qux"]];
const output = `0\n1${CRLF}foo\nbar${CRLF}baz\nqux${CRLF}`;
const options = { separator: "\n", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Column: number accessor",
fn() {
const columns = [1];
const data = [{ 1: 1 }, { 1: 2 }];
const output = `1${CRLF}1${CRLF}2${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Explicit header value, no headers",
fn() {
const columns = [{ header: "Value", prop: "value" }];
const data = [{ value: "foo" }, { value: "bar" }];
const output = `foo${CRLF}bar${CRLF}`;
const options = { headers: false, columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Column: number accessor,const data = array",
fn() {
const columns = [1];
const data = [["key", "foo"], ["key", "bar"]];
const output = `1${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array number accessor",
fn() {
const columns = [[1]];
const data = [{ 1: 1 }, { 1: 2 }];
const output = `1${CRLF}1${CRLF}2${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array number accessor,const data = array",
fn() {
const columns = [[1]];
const data = [["key", "foo"], ["key", "bar"]];
const output = `1${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array number accessor,const data = array",
fn() {
const columns = [[1, 1]];
const data = [["key", ["key", "foo"]], ["key", ["key", "bar"]]];
const output = `1${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: string accessor",
fn() {
const columns = ["value"];
const data = [{ value: "foo" }, { value: "bar" }];
const output = `value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array string accessor",
fn() {
const columns = [["value"]];
const data = [{ value: "foo" }, { value: "bar" }];
const output = `value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Column: array string accessor",
fn() {
const columns = [["msg", "value"]];
const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }];
const output = `value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Explicit header",
fn() {
const columns = [
{
header: "Value",
prop: ["msg", "value"],
},
];
const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }];
const output = `Value${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: object",
fn() {
const columns = [0];
const data = [[{ value: "foo" }], [{ value: "bar" }]];
const output =
`0${CRLF}"{""value"":""foo""}"${CRLF}"{""value"":""bar""}"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: arary of objects",
fn() {
const columns = [0];
const data = [
[[{ value: "foo" }, { value: "bar" }]],
[[{ value: "baz" }, { value: "qux" }]],
];
const output =
`0${CRLF}"[{""value"":""foo""},{""value"":""bar""}]"${CRLF}"[{""value"":""baz""},{""value"":""qux""}]"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: array",
fn() {
const columns = [0];
const data = [[["foo", "bar"]], [["baz", "qux"]]];
const output =
`0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: array, separator: tab",
fn() {
const columns = [0];
const data = [[["foo", "bar"]], [["baz", "qux"]]];
const output =
`0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`;
const options = { separator: "\t", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "Targeted value: undefined",
fn() {
const columns = [0];
const data = [[], []];
const output = `0${CRLF}${CRLF}${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: null",
fn() {
const columns = [0];
const data = [[null], [null]];
const output = `0${CRLF}${CRLF}${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: hex number",
fn() {
const columns = [0];
const data = [[0xa], [0xb]];
const output = `0${CRLF}10${CRLF}11${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: BigInt",
fn() {
const columns = [0];
const data = [[BigInt("1")], [BigInt("2")]];
const output = `0${CRLF}1${CRLF}2${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: boolean",
fn() {
const columns = [0];
const data = [[true], [false]];
const output = `0${CRLF}true${CRLF}false${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: string",
fn() {
const columns = [0];
const data = [["foo"], ["bar"]];
const output = `0${CRLF}foo${CRLF}bar${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: symbol",
fn() {
const columns = [0];
const data = [[Symbol("foo")], [Symbol("bar")]];
const output = `0${CRLF}Symbol(foo)${CRLF}Symbol(bar)${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Targeted value: function",
fn() {
const columns = [0];
const data = [[(n: number) => n]];
const output = `0${CRLF}(n)=>n${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with double quote",
fn() {
const columns = [0];
const data = [['foo"']];
const output = `0${CRLF}"foo"""${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with CRLF",
fn() {
const columns = [0];
const data = [["foo\r\n"]];
const output = `0${CRLF}"foo\r\n"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with CR",
fn() {
const columns = [0];
const data = [["foo\r"]];
const output = `0${CRLF}foo\r${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with LF",
fn() {
const columns = [0];
const data = [["foo\n"]];
const output = `0${CRLF}"foo\n"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with comma",
fn() {
const columns = [0];
const data = [["foo,"]];
const output = `0${CRLF}"foo,"${CRLF}`;
assertEquals(stringify(data, { columns }), output);
},
},
);
await t.step(
{
name: "Value with comma, tab separator",
fn() {
const columns = [0];
const data = [["foo,"]];
const output = `0${CRLF}foo,${CRLF}`;
const options = { separator: "\t", columns };
assertEquals(stringify(data, options), output);
},
},
);
await t.step({
name: "Valid data, no columns",
fn() {
const data = [[1, 2, 3], [4, 5, 6]];
const output = `${CRLF}1,2,3${CRLF}4,5,6${CRLF}`;
assertEquals(stringify(data), output);
},
});
await t.step(
{
name: "byte-order mark with bom=true",
fn() {
const data = [["abc"]];
const output = `${BYTE_ORDER_MARK}abc${CRLF}`;
const options = { headers: false, bom: true };
assertStringIncludes(stringify(data, options), BYTE_ORDER_MARK);
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "no byte-order mark with omitted bom option",
fn() {
const data = [["abc"]];
const output = `abc${CRLF}`;
const options = { headers: false };
assert(!stringify(data, options).includes(BYTE_ORDER_MARK));
assertEquals(stringify(data, options), output);
},
},
);
await t.step(
{
name: "no byte-order mark with bom=false",
fn() {
const data = [["abc"]];
const output = `abc${CRLF}`;
const options = { headers: false, bom: false };
assert(!stringify(data, options).includes(BYTE_ORDER_MARK));
assertEquals(stringify(data, options), output);
},
},
);
},
});

View File

@ -11,417 +11,197 @@
* @module
*/
import { assert } from "../_util/asserts.ts";
import { convertRowToObject, type ReadOptions } from "./csv/_io.ts";
import { Parser } from "./csv/_parser.ts";
export {
/**
* @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead.
*
* The most essential aspect of a column is accessing the property holding the
* data for that column on each object in the data array. If that member is at
* the top level, `Column` can simply be a property accessor, which is either a
* `string` (if it's a plain object) or a `number` (if it's an array).
*
* ```ts
* const columns = [
* "name",
* ];
* ```
*
* Each property accessor will be used as the header for the column:
*
* | name |
* | :--: |
* | Deno |
*
* - If the required data is not at the top level (it's nested in other
* objects/arrays), then a simple property accessor won't work, so an array of
* them will be required.
*
* ```ts
* const columns = [
* ["repo", "name"],
* ["repo", "org"],
* ];
* ```
*
* When using arrays of property accessors, the header names inherit the value
* of the last accessor in each array:
*
* | name | org |
* | :--: | :------: |
* | deno | denoland |
*
* - If a different column header is desired, then a `ColumnDetails` object type
* can be used for each column:
*
* - **`header?: string`** is the optional value to use for the column header
* name
*
* - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor
* (`string` or `number`) or array of property accessors used to access the
* data on each object
*
* ```ts
* const columns = [
* "name",
* {
* prop: ["runsOn", 0],
* header: "language 1",
* },
* {
* prop: ["runsOn", 1],
* header: "language 2",
* },
* ];
* ```
*
* | name | language 1 | language 2 |
* | :--: | :--------: | :--------: |
* | Deno | Rust | TypeScript |
*/
type Column,
/**
* @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead.
*
* @param header Explicit column header name. If omitted,
* the (final) property accessor is used for this value.
*
* @param prop Property accessor(s) used to access the value on the object
*/
type ColumnDetails,
/**
* @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead.
*
* An object (plain or array)
**/
type DataItem,
ERR_BARE_QUOTE,
/** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */
ERR_FIELD_COUNT,
/** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */
ERR_INVALID_DELIM,
/** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */
ERR_QUOTE,
/**
* @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead.
*
* CSV parse helper to manipulate data.
* Provides an auto/custom mapper for columns.
*
* @example
* ```ts
* import { parse } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts";
* const string = "a,b,c\nd,e,f";
*
* console.log(
* await parse(string, {
* skipFirstRow: false,
* }),
* );
* // output:
* // [["a", "b", "c"], ["d", "e", "f"]]
* ```
*
* @param input Input to parse.
* @param opt options of the parser.
* @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`.
* If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unkown>[]`.
*/
parse,
/** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */
ParseError,
} from "./csv/_io.ts";
export type { ReadOptions } from "./csv/_io.ts";
const QUOTE = '"';
const LF = "\n";
const CRLF = "\r\n";
const BYTE_ORDER_MARK = "\ufeff";
export class StringifyError extends Error {
override readonly name = "StringifyError";
}
function getEscapedString(value: unknown, sep: string): string {
if (value === undefined || value === null) return "";
let str = "";
if (typeof value === "object") str = JSON.stringify(value);
else str = String(value);
// Is regex.test more performant here? If so, how to dynamically create?
// https://stackoverflow.com/questions/3561493/
if (str.includes(sep) || str.includes(LF) || str.includes(QUOTE)) {
return `${QUOTE}${str.replaceAll(QUOTE, `${QUOTE}${QUOTE}`)}${QUOTE}`;
}
return str;
}
type PropertyAccessor = number | string;
/**
* @param header Explicit column header name. If omitted,
* the (final) property accessor is used for this value.
*
* @param prop Property accessor(s) used to access the value on the object
*/
export type ColumnDetails = {
header?: string;
prop: PropertyAccessor | PropertyAccessor[];
};
/**
* The most essential aspect of a column is accessing the property holding the
* data for that column on each object in the data array. If that member is at
* the top level, `Column` can simply be a property accessor, which is either a
* `string` (if it's a plain object) or a `number` (if it's an array).
*
* ```ts
* const columns = [
* "name",
* ];
* ```
*
* Each property accessor will be used as the header for the column:
*
* | name |
* | :--: |
* | Deno |
*
* - If the required data is not at the top level (it's nested in other
* objects/arrays), then a simple property accessor won't work, so an array of
* them will be required.
*
* ```ts
* const columns = [
* ["repo", "name"],
* ["repo", "org"],
* ];
* ```
*
* When using arrays of property accessors, the header names inherit the value
* of the last accessor in each array:
*
* | name | org |
* | :--: | :------: |
* | deno | denoland |
*
* - If a different column header is desired, then a `ColumnDetails` object type
* can be used for each column:
*
* - **`header?: string`** is the optional value to use for the column header
* name
*
* - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor
* (`string` or `number`) or array of property accessors used to access the
* data on each object
*
* ```ts
* const columns = [
* "name",
* {
* prop: ["runsOn", 0],
* header: "language 1",
* },
* {
* prop: ["runsOn", 1],
* header: "language 2",
* },
* ];
* ```
*
* | name | language 1 | language 2 |
* | :--: | :--------: | :--------: |
* | Deno | Rust | TypeScript |
*/
export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[];
type NormalizedColumn = Omit<ColumnDetails, "header" | "prop"> & {
header: string;
prop: PropertyAccessor[];
};
function normalizeColumn(column: Column): NormalizedColumn {
let header: NormalizedColumn["header"],
prop: NormalizedColumn["prop"];
if (typeof column === "object") {
if (Array.isArray(column)) {
header = String(column[column.length - 1]);
prop = column;
} else {
prop = Array.isArray(column.prop) ? column.prop : [column.prop];
header = typeof column.header === "string"
? column.header
: String(prop[prop.length - 1]);
}
} else {
header = String(column);
prop = [column];
}
return { header, prop };
}
type ObjectWithStringPropertyKeys = Record<string, unknown>;
/** An object (plain or array) */
export type DataItem = ObjectWithStringPropertyKeys | unknown[];
/**
* Returns an array of values from an object using the property accessors
* (and optional transform function) in each column
*/
function getValuesFromItem(
item: DataItem,
normalizedColumns: NormalizedColumn[],
): unknown[] {
const values: unknown[] = [];
if (normalizedColumns.length) {
for (const column of normalizedColumns) {
let value: unknown = item;
for (const prop of column.prop) {
if (typeof value !== "object" || value === null) continue;
if (Array.isArray(value)) {
if (typeof prop === "number") value = value[prop];
else {
throw new StringifyError(
'Property accessor is not of type "number"',
);
}
} // I think this assertion is safe. Confirm?
else value = (value as ObjectWithStringPropertyKeys)[prop];
}
values.push(value);
}
} else {
if (Array.isArray(item)) {
values.push(...item);
} else if (typeof item === "object") {
throw new StringifyError(
"No property accessor function was provided for object",
);
} else {
values.push(item);
}
}
return values;
}
export type StringifyOptions = {
/** Whether to include the row of headers or not.
/** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */
type ParseOptions,
/** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */
type ReadOptions,
/**
* @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead.
*
* @default {true}
*/
headers?: boolean;
/**
* Delimiter used to separate values. Examples:
* - `","` _comma_
* - `"\t"` _tab_
* - `"|"` _pipe_
* - etc.
* @param data The source data to stringify. It's an array of items which are
* plain objects or arrays.
*
* @default {","}
*/
separator?: string;
/**
* a list of instructions for how to target and transform the data for each
* column of output. This is also where you can provide an explicit header
* name for the column.
*/
columns?: Column[];
/**
* Whether to add a
* [byte-order mark](https://en.wikipedia.org/wiki/Byte_order_mark) to the
* beginning of the file content. Required by software such as MS Excel to
* properly display Unicode text.
* `DataItem: Record<string, unknown> | unknown[]`
*
* @default {false}
* ```ts
* const data = [
* {
* name: "Deno",
* repo: { org: "denoland", name: "deno" },
* runsOn: ["Rust", "TypeScript"],
* },
* ];
* ```
*
* @example
* ```ts
* import {
* Column,
* stringify,
* } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts";
*
* type Character = {
* age: number;
* name: {
* first: string;
* last: string;
* };
* };
*
* const data: Character[] = [
* {
* age: 70,
* name: {
* first: "Rick",
* last: "Sanchez",
* },
* },
* {
* age: 14,
* name: {
* first: "Morty",
* last: "Smith",
* },
* },
* ];
*
* let columns: Column[] = [
* ["name", "first"],
* "age",
* ];
*
* console.log(stringify(data, { columns }));
* // first,age
* // Rick,70
* // Morty,14
* ```
*
* @param options Output formatting options
*/
bom?: boolean;
};
/**
* @param data The source data to stringify. It's an array of items which are
* plain objects or arrays.
*
* `DataItem: Record<string, unknown> | unknown[]`
*
* ```ts
* const data = [
* {
* name: "Deno",
* repo: { org: "denoland", name: "deno" },
* runsOn: ["Rust", "TypeScript"],
* },
* ];
* ```
*
* @example
* ```ts
* import {
* Column,
* stringify,
* } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts";
*
* type Character = {
* age: number;
* name: {
* first: string;
* last: string;
* };
* };
*
* const data: Character[] = [
* {
* age: 70,
* name: {
* first: "Rick",
* last: "Sanchez",
* },
* },
* {
* age: 14,
* name: {
* first: "Morty",
* last: "Smith",
* },
* },
* ];
*
* let columns: Column[] = [
* ["name", "first"],
* "age",
* ];
*
* console.log(stringify(data, { columns }));
* // first,age
* // Rick,70
* // Morty,14
* ```
*
* @param options Output formatting options
*/
export function stringify(
data: DataItem[],
{ headers = true, separator: sep = ",", columns = [], bom = false }:
StringifyOptions = {},
): string {
if (sep.includes(QUOTE) || sep.includes(CRLF)) {
const message = [
"Separator cannot include the following strings:",
' - U+0022: Quotation mark (")',
" - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)",
].join("\n");
throw new StringifyError(message);
}
const normalizedColumns = columns.map(normalizeColumn);
let output = "";
if (bom) {
output += BYTE_ORDER_MARK;
}
if (headers) {
output += normalizedColumns
.map((column) => getEscapedString(column.header, sep))
.join(sep);
output += CRLF;
}
for (const item of data) {
const values = getValuesFromItem(item, normalizedColumns);
output += values
.map((value) => getEscapedString(value, sep))
.join(sep);
output += CRLF;
}
return output;
}
export interface ParseOptions extends ReadOptions {
/**
* If you provide `skipFirstRow: true` and `columns`, the first line will be
* skipped.
* If you provide `skipFirstRow: true` but not `columns`, the first line will
* be skipped and used as header definitions.
*/
skipFirstRow?: boolean;
/** List of names used for header definition. */
columns?: string[];
}
/**
* Csv parse helper to manipulate data.
* Provides an auto/custom mapper for columns.
*
* @example
* ```ts
* import { parse } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts";
* const string = "a,b,c\nd,e,f";
*
* console.log(
* await parse(string, {
* skipFirstRow: false,
* }),
* );
* // output:
* // [["a", "b", "c"], ["d", "e", "f"]]
* ```
*
* @param input Input to parse.
* @param opt options of the parser.
* @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`.
* If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record<string, unkown>[]`.
*/
export function parse(
input: string,
): string[][];
export function parse(
input: string,
opt: Omit<ParseOptions, "columns" | "skipFirstRow">,
): string[][];
export function parse(
input: string,
opt: Omit<ParseOptions, "columns"> & {
columns: string[];
},
): Record<string, unknown>[];
export function parse(
input: string,
opt: Omit<ParseOptions, "skipFirstRow"> & {
skipFirstRow: true;
},
): Record<string, unknown>[];
export function parse(
input: string,
opt: ParseOptions,
): string[][] | Record<string, unknown>[];
export function parse(
input: string,
opt: ParseOptions = {
skipFirstRow: false,
},
): string[][] | Record<string, unknown>[] {
const parser = new Parser(opt);
const r = parser.parse(input);
if (opt.skipFirstRow || opt.columns) {
let headers: string[] = [];
if (opt.skipFirstRow) {
const head = r.shift();
assert(head != null);
headers = head;
}
if (opt.columns) {
headers = opt.columns;
}
const firstLineIndex = opt.skipFirstRow ? 1 : 0;
return r.map((row, i) => {
return convertRowToObject(row, headers, firstLineIndex + i);
});
}
return r;
}
stringify,
/** @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. */
StringifyError,
/** @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. */
type StringifyOptions,
} from "../csv/mod.ts";

View File

@ -1,149 +1,7 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import {
convertRowToObject,
defaultReadOptions,
type LineReader,
parseRecord,
type RowType,
} from "./_io.ts";
import { TextDelimiterStream } from "../../streams/text_delimiter_stream.ts";
export interface CsvStreamOptions {
separator?: string;
comment?: string;
skipFirstRow?: boolean;
columns?: string[];
}
class StreamLineReader implements LineReader {
#reader: ReadableStreamDefaultReader<string>;
#done = false;
constructor(reader: ReadableStreamDefaultReader<string>) {
this.#reader = reader;
}
async readLine(): Promise<string | null> {
const { value, done } = await this.#reader.read();
if (done) {
this.#done = true;
return null;
} else {
// NOTE: Remove trailing CR for compatibility with golang's `encoding/csv`
return stripLastCR(value!);
}
}
isEOF(): Promise<boolean> {
return Promise.resolve(this.#done);
}
cancel() {
this.#reader.cancel();
}
}
function stripLastCR(s: string): string {
return s.endsWith("\r") ? s.slice(0, -1) : s;
}
export class CsvStream<T extends CsvStreamOptions>
implements TransformStream<string, RowType<CsvStreamOptions, T>> {
readonly #readable: ReadableStream<
string[] | Record<string, string | unknown>
>;
readonly #options: CsvStreamOptions;
readonly #lineReader: StreamLineReader;
readonly #lines: TextDelimiterStream;
#lineIndex = 0;
#isFirstRow = true;
#headers: string[] = [];
constructor(options: T = defaultReadOptions as T) {
this.#options = {
...defaultReadOptions,
...options,
};
this.#lines = new TextDelimiterStream("\n");
this.#lineReader = new StreamLineReader(this.#lines.readable.getReader());
this.#readable = new ReadableStream({
pull: (controller) => this.#pull(controller),
cancel: () => this.#lineReader.cancel(),
});
}
async #pull(
controller: ReadableStreamDefaultController<
string[] | Record<string, string | unknown>
>,
): Promise<void> {
const line = await this.#lineReader.readLine();
if (line === "") {
// Found an empty line
this.#lineIndex++;
return this.#pull(controller);
}
if (line === null) {
// Reached to EOF
controller.close();
this.#lineReader.cancel();
return;
}
const record = await parseRecord(
line,
this.#lineReader,
this.#options,
this.#lineIndex,
);
if (record === null) {
controller.close();
this.#lineReader.cancel();
return;
}
if (this.#isFirstRow) {
this.#isFirstRow = false;
if (this.#options.skipFirstRow || this.#options.columns) {
this.#headers = [];
if (this.#options.skipFirstRow) {
const head = record;
this.#headers = head;
}
if (this.#options.columns) {
this.#headers = this.#options.columns;
}
}
if (this.#options.skipFirstRow) {
return this.#pull(controller);
}
}
this.#lineIndex++;
if (record.length > 0) {
if (this.#options.skipFirstRow || this.#options.columns) {
controller.enqueue(convertRowToObject(
record,
this.#headers,
this.#lineIndex,
));
} else {
controller.enqueue(record);
}
} else {
return this.#pull(controller);
}
}
get readable() {
return this.#readable as ReadableStream<RowType<CsvStreamOptions, T>>;
}
get writable(): WritableStream<string> {
return this.#lines.writable;
}
}
export {
/** @deprecated (will be removed after 0.182.0) Import from `csv/stream.ts` instead. */
CsvStream,
/** @deprecated (will be removed after 0.182.0) Import from `csv/stream.ts` instead. */
type CsvStreamOptions,
} from "../../csv/stream.ts";