diff --git a/csv/_io.ts b/csv/_io.ts index 4fefbcfa9..346818ab7 100644 --- a/csv/_io.ts +++ b/csv/_io.ts @@ -45,7 +45,8 @@ export interface ReadOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a `ParseError` is thrown. + * If the wrong number of fields is in a row, a {@linkcode SyntaxError} is + * thrown. */ fieldsPerRecord?: number; } @@ -227,11 +228,13 @@ export function createQuoteErrorMessage( export function convertRowToObject( row: string[], headers: readonly string[], - index: number, + zeroBasedLine: number, ) { if (row.length !== headers.length) { throw new Error( - `Error number of fields line: ${index}\nNumber of fields found: ${headers.length}\nExpected number of fields: ${row.length}`, + `record on line ${ + zeroBasedLine + 1 + } has ${row.length} fields, but the header has ${headers.length} fields`, ); } const out: Record = {}; diff --git a/csv/mod.ts b/csv/mod.ts index d24467dee..5afc62b94 100644 --- a/csv/mod.ts +++ b/csv/mod.ts @@ -56,7 +56,9 @@ * * results in * + * ```ts no-assert * [`the "word" is true`, `a "quoted-field"`] + * ``` * * Newlines and commas may be included in a quoted-field * diff --git a/csv/parse.ts b/csv/parse.ts index 4c4b0e15e..bc2bed875 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -208,7 +208,7 @@ class Parser { this.#input = input.startsWith(BYTE_ORDER_MARK) ? input.slice(1) : input; this.#cursor = 0; const result: string[][] = []; - let _nbFields: number | undefined; + let lineResult: string[]; let first = true; let lineIndex = 0; @@ -225,6 +225,25 @@ class Parser { throw new Error("Invalid Delimiter"); } + // The number of fields per record that is either inferred from the first + // row (when options.fieldsPerRecord = 0), or set by the caller (when + // options.fieldsPerRecord > 0). + // + // Each possible variant means the following: + // "ANY": Variable number of fields is allowed. + // "UNINITIALIZED": The first row has not been read yet. Once it's read, the + // number of fields will be set. + // : The number of fields per record that every record must follow. + let _nbFields: "ANY" | "UNINITIALIZED" | number; + if (options.fieldsPerRecord === undefined || options.fieldsPerRecord < 0) { + _nbFields = "ANY"; + } else if (options.fieldsPerRecord === 0) { + _nbFields = "UNINITIALIZED"; + } else { + // TODO: Should we check if it's a valid integer? + _nbFields = options.fieldsPerRecord; + } + while (true) { const r = this.#parseRecord(lineIndex); if (r === null) break; @@ -234,19 +253,15 @@ class Parser { // the number of fields in the first record if (first) { first = false; - if (options.fieldsPerRecord !== undefined) { - if (options.fieldsPerRecord === 0) { - _nbFields = lineResult.length; - } else { - _nbFields = options.fieldsPerRecord; - } + if (_nbFields === "UNINITIALIZED") { + _nbFields = lineResult.length; } } if (lineResult.length > 0) { - if (_nbFields && _nbFields !== lineResult.length) { + if (typeof _nbFields === "number" && _nbFields !== lineResult.length) { throw new SyntaxError( - `record on line ${lineIndex}: wrong number of fields`, + `record on line ${lineIndex}: expected ${_nbFields} fields but got ${lineResult.length}`, ); } result.push(lineResult); @@ -269,7 +284,7 @@ export interface ParseOptions { * are ignored. With leading whitespace the comment character becomes part of * the field, even you provide `trimLeadingSpace: true`. * - * @default {"#"} + * By default, no character is considered to be a start of a comment. */ comment?: string; /** Flag to trim the leading space of the value. @@ -319,11 +334,25 @@ export interface ParseOptions { * @example Usage * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; * - * const string = "a,b,c\nd,e,f"; + * const string = "a,b,c\n#d,e,f"; * - * assertEquals(parse(string), [["a", "b", "c"], ["d", "e", "f"]]); + * assertEquals(parse(string), [["a", "b", "c"], ["#d", "e", "f"]]); + * ``` + * + * @example Quoted fields + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * + * const string = `"a ""word""","comma,","newline\n"\nfoo,bar,baz`; + * const result = parse(string); + * + * assertEquals(result, [ + * ['a "word"', "comma,", "newline\n"], + * ["foo", "bar", "baz"] + * ]); * ``` * * @param input The input to parse. @@ -333,26 +362,139 @@ export function parse(input: string): string[][]; /** * Parses CSV string into an array of objects or an array of arrays of strings. * - * If `column` or `skipFirstRow` option is provided, it returns an array of + * If `columns` or `skipFirstRow` option is provided, it returns an array of * objects, otherwise it returns an array of arrays of string. * - * @example Usage + * @example Don't skip first row with `skipFirstRow: false` * ```ts * import { parse } from "@std/csv/parse"; - * import { assertEquals } from "@std/assert"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" * * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { skipFirstRow: false }); * - * assertEquals(parse(string, { skipFirstRow: false }), [["a", "b", "c"], ["d", "e", "f"]]); - * assertEquals(parse(string, { skipFirstRow: true }), [{ a: "d", b: "e", c: "f" }]); - * assertEquals(parse(string, { columns: ["x", "y", "z"] }), [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]); + * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); + * assertType>(true); + * ``` + * + * @example Skip first row with `skipFirstRow: true` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { skipFirstRow: true }); + * + * assertEquals(result, [{ a: "d", b: "e", c: "f" }]); + * assertType[]>>(true); + * ``` + * + * @example Specify columns with `columns` option + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { columns: ["x", "y", "z"] }); + * + * assertEquals(result, [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]); + * assertType[]>>(true); + * ``` + * + * @example Specify columns with `columns` option and skip first row with + * `skipFirstRow: true` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const string = "a,b,c\nd,e,f"; + * const result = parse(string, { columns: ["x", "y", "z"], skipFirstRow: true }); + * + * assertEquals(result, [{ x: "d", y: "e", z: "f" }]); + * assertType[]>>(true); + * ``` + * + * @example TSV (tab-separated values) with `separator: "\t"` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * + * const string = "a\tb\tc\nd\te\tf"; + * const result = parse(string, { separator: "\t" }); + * + * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); + * ``` + * + * @example Trim leading space with `trimLeadingSpace: true` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * + * const string = " a, b, c\n"; + * const result = parse(string, { trimLeadingSpace: true }); + * + * assertEquals(result, [["a", "b", "c"]]); + * ``` + * + * @example Lazy quotes with `lazyQuotes: true` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * + * const string = `a "word","1"2",a","b`; + * const result = parse(string, { lazyQuotes: true }); + * + * assertEquals(result, [['a "word"', '1"2', 'a"', 'b']]); + * ``` + * + * @example Set comment prefix with `comment` option + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * + * const string = "a,b,c\n# THIS IS A COMMENT LINE\nd,e,f"; + * const result = parse(string, { comment: "#" }); + * + * assertEquals(result, [["a", "b", "c"], ["d", "e", "f"]]); + * ``` + * + * @example Infer the number of fields from the first row with `fieldsPerRecord: 0` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertThrows } from "@std/assert/throws"; + * + * // Note that the second row has more fields than the first row + * const string = "a,b\nc,d,e"; + * assertThrows( + * () => parse(string, { fieldsPerRecord: 0 }), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); + * ``` + * + * @example Enforce the number of fields for each row with `fieldsPerRecord: 2` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertThrows } from "@std/assert/throws"; + * + * const string = "a,b\nc,d,e"; + * assertThrows( + * () => parse(string, { fieldsPerRecord: 2 }), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); * ``` * * @typeParam T The options' type for parsing. * @param input The input to parse. * @param options The options for parsing. - * @returns If you don't provide `options.skipFirstRow` and `options.columns`, it returns `string[][]`. - * If you provide `options.skipFirstRow` or `options.columns`, it returns `Record[]`. + * @returns If you don't provide `options.skipFirstRow` or `options.columns`, it + * returns `string[][]`. If you provide `options.skipFirstRow` or + * `options.columns`, it returns `Record[]`. */ export function parse( input: string, @@ -378,9 +520,9 @@ export function parse( headers = options.columns; } - const firstLineIndex = options.skipFirstRow ? 1 : 0; + const zeroBasedFirstLineIndex = options.skipFirstRow ? 1 : 0; return r.map((row, i) => { - return convertRowToObject(row, headers, firstLineIndex + i); + return convertRowToObject(row, headers, zeroBasedFirstLineIndex + i); }) as ParseResult; } return r as ParseResult; diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index 2d975705a..d0304b895 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -23,7 +23,7 @@ export interface CsvParseStreamOptions { * are ignored. With leading whitespace the comment character becomes part of * the field, even you provide `trimLeadingSpace: true`. * - * @default {"#"} + * By default, no character is considered to be a start of a comment. */ comment?: string; /** Flag to trim the leading space of the value. @@ -49,8 +49,8 @@ export interface CsvParseStreamOptions { * If negative, no check is made and records may have a variable number of * fields. * - * If the wrong number of fields is in a row, a {@linkcode ParseError} is - * thrown. + * If the wrong number of fields is in a row, a {@linkcode https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SyntaxError | SyntaxError} + * is thrown. */ fieldsPerRecord?: number; /** @@ -108,17 +108,233 @@ export type RowType = T extends undefined ? string[] * A `CsvParseStream` expects input conforming to * {@link https://www.rfc-editor.org/rfc/rfc4180.html | RFC 4180}. * - * @example Usage - * ```ts no-assert + * @example Usage with default options + * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" * * const source = ReadableStream.from([ - * "name,age", - * "Alice,34", - * "Bob,24", - * "Charlie,45", + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", * ]); - * const parts = source.pipeThrough(new CsvParseStream()); + * const stream = source.pipeThrough(new CsvParseStream()); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["name", "age"], + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * assertType>(true); + * ``` + * + * @example Skip first row with `skipFirstRow: true` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const source = ReadableStream.from([ + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ skipFirstRow: true })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); + * assertType[]>>(true); + * ``` + * + * @example Specify columns with `columns` option + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * columns: ["name", "age"] + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); + * assertType[]>>(true); + * ``` + * + * @example Specify columns with `columns` option and skip first row with + * `skipFirstRow: true` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertType, IsExact } from "@std/testing/types" + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * columns: ["name", "age"], + * skipFirstRow: true, + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [{ name: "Bob", age: "24" }]); + * assertType[]>>(true); + * ``` + * + * @example TSV (tab-separated values) with `separator: "\t"` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * "Alice\t34\n", + * "Bob\t24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * separator: "\t", + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * ``` + * + * @example Trim leading space with `trimLeadingSpace: true` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * " Alice,34\n ", + * "Bob, 24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * trimLeadingSpace: true, + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * ``` + * + * @example Quoted fields + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * `"a ""word""","com`, + * `ma,","newline`, + * `\n"\nfoo,bar,b`, + * `az\n`, + * ]); + * const stream = source.pipeThrough(new CsvParseStream()); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ['a "word"', "comma,", "newline\n"], + * ["foo", "bar", "baz"] + * ]); + * ``` + * + * @example Allow lazy quotes with `lazyQuotes: true` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * `a "word","1"`, + * `2",a","b`, + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * lazyQuotes: true, + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [['a "word"', '1"2', 'a"', 'b']]); + * ``` + * + * @example Define comment prefix with `comment` option + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "# THIS IS A COMMENT\n", + * "Bob,24\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * comment: "#", + * })); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * ["Alice", "34"], + * ["Bob", "24"], + * ]); + * ``` + * + * @example Infer the number of fields from the first row with + * `fieldsPerRecord: 0` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertRejects } from "@std/assert/rejects"; + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24,CA\n", // Note that this row has more fields than the first row + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * fieldsPerRecord: 0, + * })); + * const reader = stream.getReader(); + * assertEquals(await reader.read(), { done: false, value: ["Alice", "34"] }); + * await assertRejects( + * () => reader.read(), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); + * ``` + * + * @example Enforce the number of field for each row with `fieldsPerRecord: 2` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * import { assertRejects } from "@std/assert/rejects"; + * + * const source = ReadableStream.from([ + * "Alice,34\n", + * "Bob,24,CA\n", + * ]); + * const stream = source.pipeThrough(new CsvParseStream({ + * fieldsPerRecord: 2, + * })); + * const reader = stream.getReader(); + * assertEquals(await reader.read(), { done: false, value: ["Alice", "34"] }); + * await assertRejects( + * () => reader.read(), + * SyntaxError, + * "record on line 2: expected 2 fields but got 3", + * ); * ``` * * @typeParam T The type of options for the stream. @@ -258,20 +474,21 @@ export class CsvParseStream< * The instance's {@linkcode ReadableStream}. * * @example Usage - * ```ts no-assert + * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; * * const source = ReadableStream.from([ - * "name,age", - * "Alice,34", - * "Bob,24", - * "Charlie,45", + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", * ]); - * const parseStream = new CsvParseStream(); + * const parseStream = new CsvParseStream({ skipFirstRow: true }); * const parts = source.pipeTo(parseStream.writable); - * for await (const part of parseStream.readable) { - * console.log(part); - * } + * assertEquals(await Array.fromAsync(parseStream.readable), [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); * ``` * * @returns The instance's {@linkcode ReadableStream}. @@ -284,20 +501,21 @@ export class CsvParseStream< * The instance's {@linkcode WritableStream}. * * @example Usage - * ```ts no-assert + * ```ts * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; * * const source = ReadableStream.from([ - * "name,age", - * "Alice,34", - * "Bob,24", - * "Charlie,45", + * "name,age\n", + * "Alice,34\n", + * "Bob,24\n", * ]); - * const parseStream = new CsvParseStream(); + * const parseStream = new CsvParseStream({ skipFirstRow: true }); * const parts = source.pipeTo(parseStream.writable); - * for await (const part of parseStream.readable) { - * console.log(part); - * } + * assertEquals(await Array.fromAsync(parseStream.readable), [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: "24" }, + * ]); * ``` * * @returns The instance's {@linkcode WritableStream}. diff --git a/csv/parse_stream_test.ts b/csv/parse_stream_test.ts index 62fa5f742..6288ec4ff 100644 --- a/csv/parse_stream_test.ts +++ b/csv/parse_stream_test.ts @@ -350,14 +350,23 @@ x,,, columns: ["foo", "bar", "baz"], }, { - name: "mismatching number of headers and fields", + name: "mismatching number of headers and fields 1", input: "a,b,c\nd,e", skipFirstRow: true, columns: ["foo", "bar", "baz"], error: { klass: Error, - msg: - "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2", + msg: "record on line 2 has 2 fields, but the header has 3 fields", + }, + }, + { + name: "mismatching number of headers and fields 2", + input: "a,b,c\nd,e,,g", + skipFirstRow: true, + columns: ["foo", "bar", "baz"], + error: { + klass: Error, + msg: "record on line 2 has 4 fields, but the header has 3 fields", }, }, { diff --git a/csv/parse_test.ts b/csv/parse_test.ts index e2b3c74ed..f5dae7abc 100644 --- a/csv/parse_test.ts +++ b/csv/parse_test.ts @@ -23,6 +23,7 @@ Deno.test({ ); }, }); + await t.step({ name: "CRLF", fn() { @@ -97,6 +98,42 @@ Deno.test({ }, }); + await t.step({ + name: "BlankField", + fn() { + const input = "a,b,c\nd,,f"; + assertEquals( + parse(input), + [["a", "b", "c"], ["d", "", "f"]], + ); + }, + }); + + await t.step({ + name: "BlankField2", + fn() { + const input = "a,b,c\nd,,f"; + assertEquals( + parse(input, { skipFirstRow: true }), + [{ a: "d", b: "", c: "f" }], + ); + }, + }); + + await t.step({ + name: "BlankField3", + fn() { + const input = "a,b,c\nd,,f"; + assertEquals( + parse(input, { columns: ["one", "two", "three"] }), + [ + { one: "a", two: "b", three: "c" }, + { one: "d", two: "", three: "f" }, + ], + ); + }, + }); + await t.step({ name: "BlankLine", fn() { @@ -257,7 +294,7 @@ Deno.test({ assertThrows( () => parse(input, { fieldsPerRecord: 0 }), SyntaxError, - "record on line 2: wrong number of fields", + "record on line 2: expected 3 fields but got 2", ); }, }); @@ -268,10 +305,21 @@ Deno.test({ assertThrows( () => parse(input, { fieldsPerRecord: 2 }), SyntaxError, - "record on line 1: wrong number of fields", + "record on line 1: expected 2 fields but got 3", ); }, }); + await t.step({ + name: "NegativeFieldsPerRecord", + fn() { + const input = `a,b,c\nd,e`; + const output = [ + ["a", "b", "c"], + ["d", "e"], + ]; + assertEquals(parse(input, { fieldsPerRecord: -1 }), output); + }, + }); await t.step({ name: "FieldCount", fn() { @@ -783,7 +831,7 @@ c"d,e`; }, }); await t.step({ - name: "mismatching number of headers and fields", + name: "mismatching number of headers and fields 1", fn() { const input = "a,b,c\nd,e"; assertThrows( @@ -793,7 +841,22 @@ c"d,e`; columns: ["foo", "bar", "baz"], }), Error, - "Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2", + "record on line 2 has 2 fields, but the header has 3 fields", + ); + }, + }); + await t.step({ + name: "mismatching number of headers and fields 2", + fn() { + const input = "a,b,c\nd,e,,g"; + assertThrows( + () => + parse(input, { + skipFirstRow: true, + columns: ["foo", "bar", "baz"], + }), + Error, + "record on line 2 has 4 fields, but the header has 3 fields", ); }, });