// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. import { CsvParseStream } from "./parse_stream.ts"; import type { CsvParseStreamOptions } from "./parse_stream.ts"; import { assert, assertEquals, assertRejects } from "@std/assert"; import type { AssertTrue, IsExact } from "@std/testing/types"; import { fromFileUrl, join } from "@std/path"; import { delay } from "@std/async/delay"; const testdataDir = join(fromFileUrl(import.meta.url), "../testdata"); const encoder = new TextEncoder(); Deno.test({ name: "CsvParseStream should work with Deno.FsFile's readable", permissions: { read: [testdataDir], }, fn: async () => { const file = await Deno.open(join(testdataDir, "simple.csv")); const readable = file.readable .pipeThrough(new TextDecoderStream()) .pipeThrough(new CsvParseStream()); const records = await Array.fromAsync(readable); assertEquals(records, [ ["id", "name"], ["1", "foobar"], ["2", "barbaz"], ]); }, }); Deno.test({ name: "CsvParseStream throws at invalid csv line", fn: async () => { const readable = ReadableStream.from([ encoder.encode("id,name\n"), encoder.encode("\n"), encoder.encode("1,foo\n"), encoder.encode('2,"baz\n'), ]).pipeThrough(new TextDecoderStream()).pipeThrough( new CsvParseStream(), ); const reader = readable.getReader(); assertEquals(await reader.read(), { done: false, value: ["id", "name"] }); assertEquals(await reader.read(), { done: false, value: ["1", "foo"] }); await assertRejects( () => reader.read(), SyntaxError, `Syntax error on line 4; parse error on line 5, column 1: extraneous or missing " in quoted-field`, ); }, }); Deno.test({ name: "CsvParseStream handles various inputs", permissions: "none", fn: async (t) => { // These test cases were originally ported from Go: // https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/ // Copyright 2011 The Go Authors. All rights reserved. BSD license. // https://github.com/golang/go/blob/master/LICENSE const testCases = [ { name: "CRLF", input: "a,b\r\nc,d\r\n", output: [["a", "b"], ["c", "d"]], }, { name: "BareCR", input: "a,b\rc,d\r\n", output: [["a", "b\rc", "d"]], }, { name: "NoEOLTest", input: "a,b,c", output: [["a", "b", "c"]], }, { name: "Semicolon", input: "a;b;c\n", output: [["a", "b", "c"]], separator: ";", }, { name: "Separator is undefined", input: "a,b,c\n", output: [["a", "b", "c"]], separator: undefined, }, { name: "MultiLine", input: `"two line","one line","three line field"`, output: [["two\nline", "one line", "three\nline\nfield"]], }, { name: "BlankLine", input: "a,b,c\n\nd,e,f\n\n", output: [ ["a", "b", "c"], ["d", "e", "f"], ], }, { name: "LeadingSpace", input: " a, b, c\n", output: [[" a", " b", " c"]], }, { name: "trimLeadingSpace = true", input: " a, b, c\n", output: [["a", "b", "c"]], trimLeadingSpace: true, }, { name: "Comment", input: "#1,2,3\na,b,c\n#comment", output: [["a", "b", "c"]], comment: "#", }, { name: "NoComment", input: "#1,2,3\na,b,c", output: [ ["#1", "2", "3"], ["a", "b", "c"], ], }, { name: "fieldsPerRecord - variable number of fields is allowed", input: "a,b,c\nd,e", output: [ ["a", "b", "c"], ["d", "e"], ], }, { name: "fieldsPerRecord = -42 - variable number of fields is allowed", input: "a,b,c\nd,e", output: [ ["a", "b", "c"], ["d", "e"], ], fieldsPerRecord: -42, }, { name: "fieldsPerRecord = 0 - the number of fields is inferred from the first row", input: "a,b,c\nd,e,f", output: [ ["a", "b", "c"], ["d", "e", "f"], ], fieldsPerRecord: 0, }, { name: "fieldsPerRecord = 0 - inferred number of fields does not match subsequent rows", input: "a,b,c\nd,e", fieldsPerRecord: 0, error: { klass: SyntaxError, msg: "Syntax error on line 2: expected 3 fields but got 2", }, }, { name: "fieldsPerRecord = 3 - SyntaxError is thrown when the number of fields is not 2", input: "a,b,c\nd,e", fieldsPerRecord: 3, error: { klass: SyntaxError, msg: "Syntax error on line 2: expected 3 fields but got 2", }, }, { name: "TrailingCommaEOF", input: "a,b,c,", output: [["a", "b", "c", ""]], }, { name: "TrailingCommaEOL", input: "a,b,c,\n", output: [["a", "b", "c", ""]], }, { name: "NotTrailingComma3", input: "a,b,c, \n", output: [["a", "b", "c", " "]], }, { name: "CommaFieldTest", input: `x,y,z,w x,y,z, x,y,, x,,, ,,, "x","y","z","w" "x","y","z","" "x","y","","" "x","","","" "","","","" `, output: [ ["x", "y", "z", "w"], ["x", "y", "z", ""], ["x", "y", "", ""], ["x", "", "", ""], ["", "", "", ""], ["x", "y", "z", "w"], ["x", "y", "z", ""], ["x", "y", "", ""], ["x", "", "", ""], ["", "", "", ""], ], }, { name: "CRLFInQuotedField", // Issue 21201 input: 'A,"Hello\r\nHi",B\r\n', output: [["A", "Hello\nHi", "B"]], }, { name: "BinaryBlobField", // Issue 19410 input: "x09\x41\xb4\x1c,aktau", output: [["x09A\xb4\x1c", "aktau"]], }, { name: "TrailingCR", input: "field1,field2\r", output: [["field1", "field2"]], }, { name: "QuotedTrailingCR", input: '"field"\r', output: [["field"]], }, { name: "FieldCR", input: "field\rfield\r", output: [["field\rfield"]], }, { name: "FieldCRCR", input: "field\r\rfield\r\r", output: [["field\r\rfield\r"]], }, { name: "FieldCRCRLF", input: "field\r\r\nfield\r\r\n", output: [["field\r"], ["field\r"]], }, { name: "FieldCRCRLFCR", input: "field\r\r\n\rfield\r\r\n\r", output: [["field\r"], ["\rfield\r"]], }, { name: "MultiFieldCRCRLFCRCR", input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,", output: [ ["field1", "field2\r"], ["\r\rfield1", "field2\r"], ["\r\r", ""], ], }, { name: "NonASCIICommaAndCommentWithQuotes", input: 'a€" b,"€ c\nλ comment\n', output: [["a", " b,", " c"]], separator: "€", comment: "λ", }, { // λ and θ start with the same byte. // This tests that the parser doesn't confuse such characters. name: "NonASCIICommaConfusion", input: '"abθcd"λefθgh', output: [["abθcd", "efθgh"]], separator: "λ", comment: "€", }, { name: "NonASCIICommentConfusion", input: "λ\nλ\nθ\nλ\n", output: [["λ"], ["λ"], ["λ"]], comment: "θ", }, { name: "QuotedFieldMultipleLF", input: '"\n\n\n\n"', output: [["\n\n\n\n"]], }, { name: "MultipleCRLF", input: "\r\n\r\n\r\n\r\n", output: [], }, { name: "DoubleQuoteWithTrailingCRLF", input: '"foo""bar"\r\n', output: [[`foo"bar`]], }, { name: "EvenQuotes", input: `""""""""`, output: [[`"""`]], }, { name: "simple", input: "a,b,c", output: [["a", "b", "c"]], skipFirstRow: false, }, { name: "multiline", input: "a,b,c\ne,f,g\n", output: [ ["a", "b", "c"], ["e", "f", "g"], ], skipFirstRow: false, }, { name: "header mapping boolean", input: "a,b,c\ne,f,g\n", output: [{ a: "e", b: "f", c: "g" }], skipFirstRow: true, }, { name: "header mapping array", input: "a,b,c\ne,f,g\n", output: [ { this: "a", is: "b", sparta: "c" }, { this: "e", is: "f", sparta: "g" }, ], columns: ["this", "is", "sparta"], }, { name: "provides both opts.skipFirstRow and opts.columns", input: "a,b,1\nc,d,2\ne,f,3", output: [ { foo: "c", bar: "d", baz: "2" }, { foo: "e", bar: "f", baz: "3" }, ], skipFirstRow: true, columns: ["foo", "bar", "baz"], }, { name: "mismatching number of headers and fields 1", input: "a,b,c\nd,e", skipFirstRow: true, columns: ["foo", "bar", "baz"], error: { klass: Error, msg: "Syntax error on line 2: The record has 2 fields, but the header has 3 fields", }, }, { name: "mismatching number of headers and fields 2", input: "a,b,c\nd,e,,g", skipFirstRow: true, columns: ["foo", "bar", "baz"], error: { klass: Error, msg: "Syntax error on line 2: The record has 4 fields, but the header has 3 fields", }, }, { name: "bad quote in bare field", input: `a "word",1,2,3`, error: { klass: SyntaxError, msg: 'Syntax error on line 1; parse error on line 1, column 3: bare " in non-quoted-field', }, }, { name: "bad quote in quoted field", input: `"wo"rd",1,2,3`, error: { klass: SyntaxError, msg: 'Syntax error on line 1; parse error on line 1, column 4: extraneous or missing " in quoted-field', }, }, { name: "bad quote at line 1 in quoted field with newline", input: `"w\n\no"rd",1,2,3`, error: { klass: SyntaxError, msg: 'Syntax error on line 1; parse error on line 3, column 2: extraneous or missing " in quoted-field', }, }, { name: "bad quote at line 2 in quoted field with newline", input: `a,b,c,d\n"w\n\no"rd",1,2,3`, error: { klass: SyntaxError, msg: 'Syntax error on line 2; parse error on line 4, column 2: extraneous or missing " in quoted-field', }, }, { name: "lazy quote", input: `a "word","1"2",a","b`, output: [[`a "word"`, `1"2`, `a"`, `b`]], lazyQuotes: true, }, ]; for (const testCase of testCases) { await t.step(testCase.name, async () => { const options: CsvParseStreamOptions = {}; if ("separator" in testCase) { // @ts-expect-error: explicitly giving undefined options.separator = testCase.separator; } if ("comment" in testCase) { options.comment = testCase.comment; } if ("trimLeadingSpace" in testCase) { options.trimLeadingSpace = testCase.trimLeadingSpace; } if ("lazyQuotes" in testCase) { options.lazyQuotes = testCase.lazyQuotes; } if ("fieldsPerRecord" in testCase) { options.fieldsPerRecord = testCase.fieldsPerRecord; } if ("skipFirstRow" in testCase) { options.skipFirstRow = testCase.skipFirstRow; } if ("columns" in testCase) { options.columns = testCase.columns; } const readable = ReadableStream.from([testCase.input]) .pipeThrough(new CsvParseStream(options)); if (testCase.output) { const actual = await Array.fromAsync(readable); assertEquals(actual, testCase.output); } else { assert(testCase.error); await assertRejects( async () => { for await (const _ of readable); }, testCase.error.klass, testCase.error.msg, ); } }); } }, }); Deno.test({ name: "CsvParseStream.cancel() does not leak file when called in the middle of iteration", permissions: { read: [testdataDir] }, fn: async () => { const file = await Deno.open(join(testdataDir, "large.csv")); const readable = file.readable .pipeThrough(new TextDecoderStream()) .pipeThrough(new CsvParseStream()); for await (const _record of readable) { break; } // FIXME(kt3k): Remove this delay. await delay(100); }, }); Deno.test({ name: "CsvParseStream is correctly typed", fn() { // If no option is passed, defaults to ReadableStream. { const { readable } = new CsvParseStream(); type _ = AssertTrue>>; } { const { readable } = new CsvParseStream(undefined); type _ = AssertTrue>>; } { // `skipFirstRow` may be `true` or `false`. // `columns` may be `undefined` or `string[]`. // If you don't know exactly what the value of the option is, // the return type is ReadableStream> const options: CsvParseStreamOptions = {}; const { readable } = new CsvParseStream(options); type _ = AssertTrue< IsExact< typeof readable, ReadableStream> > >; } { const { readable } = new CsvParseStream({}); type _ = AssertTrue>>; } // skipFirstRow option { const { readable } = new CsvParseStream({}); type _ = AssertTrue>>; } { const { readable } = new CsvParseStream({ skipFirstRow: false }); type _ = AssertTrue>>; } { const { readable } = new CsvParseStream({ skipFirstRow: true }); type _ = AssertTrue< IsExact< typeof readable, ReadableStream> > >; } // columns option { const { readable } = new CsvParseStream({}); type _ = AssertTrue>>; } { const { readable } = new CsvParseStream({ columns: ["aaa", "bbb"] }); type _ = AssertTrue< IsExact>> >; } { const { readable } = new CsvParseStream({ columns: ["aaa"] as string[] }); type _ = AssertTrue< IsExact< typeof readable, ReadableStream> > >; } // skipFirstRow option + columns option { const { readable } = new CsvParseStream({ skipFirstRow: false }); type _ = AssertTrue>>; } { const { readable } = new CsvParseStream({ skipFirstRow: true }); type _ = AssertTrue< IsExact< typeof readable, ReadableStream> > >; } { const { readable } = new CsvParseStream({ skipFirstRow: false, columns: ["aaa"], }); type _ = AssertTrue< IsExact>> >; } { const { readable } = new CsvParseStream({ skipFirstRow: true, columns: ["aaa"], }); type _ = AssertTrue< IsExact>> >; } }, });