mirror of
https://github.com/denoland/std.git
synced 2024-11-21 20:50:22 +00:00
fix(encoding/csv): enable skipped tests (denoland/deno#4520)
This commit is contained in:
parent
729753a26a
commit
3d8a63d8c7
178
encoding/csv.ts
178
encoding/csv.ts
@ -1,5 +1,7 @@
|
||||
// Ported from Go:
|
||||
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
|
||||
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
|
||||
// https://github.com/golang/go/blob/master/LICENSE
|
||||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||
|
||||
import { BufReader } from "../io/bufio.ts";
|
||||
@ -9,6 +11,11 @@ import { assert } from "../testing/asserts.ts";
|
||||
|
||||
const INVALID_RUNE = ["\r", "\n", '"'];
|
||||
|
||||
export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';
|
||||
export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
|
||||
export const ERR_INVALID_DELIM = "Invalid Delimiter";
|
||||
export const ERR_FIELD_COUNT = "wrong number of fields";
|
||||
|
||||
export class ParseError extends Error {
|
||||
StartLine: number;
|
||||
Line: number;
|
||||
@ -49,23 +56,146 @@ function chkOptions(opt: ReadOptions): void {
|
||||
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
|
||||
opt.comma === opt.comment
|
||||
) {
|
||||
throw new Error("Invalid Delimiter");
|
||||
throw new Error(ERR_INVALID_DELIM);
|
||||
}
|
||||
}
|
||||
|
||||
async function read(
|
||||
async function readRecord(
|
||||
Startline: number,
|
||||
reader: BufReader,
|
||||
opt: ReadOptions = { comma: ",", trimLeadingSpace: false }
|
||||
): Promise<string[] | Deno.EOF> {
|
||||
const tp = new TextProtoReader(reader);
|
||||
let line: string;
|
||||
let result: string[] = [];
|
||||
const lineIndex = Startline;
|
||||
let line = await readLine(tp);
|
||||
|
||||
if (line === Deno.EOF) return Deno.EOF;
|
||||
if (line.length === 0) {
|
||||
return [];
|
||||
}
|
||||
// line starting with comment character is ignored
|
||||
if (opt.comment && line[0] === opt.comment) {
|
||||
return [];
|
||||
}
|
||||
|
||||
assert(opt.comma != null);
|
||||
|
||||
let quoteError: string | null = null;
|
||||
const quote = '"';
|
||||
const quoteLen = quote.length;
|
||||
const commaLen = opt.comma.length;
|
||||
let recordBuffer = "";
|
||||
const fieldIndexes = [] as number[];
|
||||
parseField: for (;;) {
|
||||
if (opt.trimLeadingSpace) {
|
||||
line = line.trimLeft();
|
||||
}
|
||||
|
||||
if (line.length === 0 || !line.startsWith(quote)) {
|
||||
// Non-quoted string field
|
||||
const i = line.indexOf(opt.comma);
|
||||
let field = line;
|
||||
if (i >= 0) {
|
||||
field = field.substring(0, i);
|
||||
}
|
||||
// Check to make sure a quote does not appear in field.
|
||||
if (!opt.lazyQuotes) {
|
||||
const j = field.indexOf(quote);
|
||||
if (j >= 0) {
|
||||
quoteError = ERR_BARE_QUOTE;
|
||||
break parseField;
|
||||
}
|
||||
}
|
||||
recordBuffer += field;
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
if (i >= 0) {
|
||||
line = line.substring(i + commaLen);
|
||||
continue parseField;
|
||||
}
|
||||
break parseField;
|
||||
} else {
|
||||
// Quoted string field
|
||||
line = line.substring(quoteLen);
|
||||
for (;;) {
|
||||
const i = line.indexOf(quote);
|
||||
if (i >= 0) {
|
||||
// Hit next quote.
|
||||
recordBuffer += line.substring(0, i);
|
||||
line = line.substring(i + quoteLen);
|
||||
if (line.startsWith(quote)) {
|
||||
// `""` sequence (append quote).
|
||||
recordBuffer += quote;
|
||||
line = line.substring(quoteLen);
|
||||
} else if (line.startsWith(opt.comma)) {
|
||||
// `","` sequence (end of field).
|
||||
line = line.substring(commaLen);
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
continue parseField;
|
||||
} else if (0 === line.length) {
|
||||
// `"\n` sequence (end of line).
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
break parseField;
|
||||
} else if (opt.lazyQuotes) {
|
||||
// `"` sequence (bare quote).
|
||||
recordBuffer += quote;
|
||||
} else {
|
||||
// `"*` sequence (invalid non-escaped quote).
|
||||
quoteError = ERR_QUOTE;
|
||||
break parseField;
|
||||
}
|
||||
} else if (line.length > 0 || !(await isEOF(tp))) {
|
||||
// Hit end of line (copy all data so far).
|
||||
recordBuffer += line;
|
||||
const r = await readLine(tp);
|
||||
if (r === Deno.EOF) {
|
||||
if (!opt.lazyQuotes) {
|
||||
quoteError = ERR_QUOTE;
|
||||
break parseField;
|
||||
}
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
break parseField;
|
||||
}
|
||||
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
|
||||
line = r;
|
||||
} else {
|
||||
// Abrupt end of file (EOF on error).
|
||||
if (!opt.lazyQuotes) {
|
||||
quoteError = ERR_QUOTE;
|
||||
break parseField;
|
||||
}
|
||||
fieldIndexes.push(recordBuffer.length);
|
||||
break parseField;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (quoteError) {
|
||||
throw new ParseError(Startline, lineIndex, quoteError);
|
||||
}
|
||||
const result = [] as string[];
|
||||
let preIdx = 0;
|
||||
for (const i of fieldIndexes) {
|
||||
result.push(recordBuffer.slice(preIdx, i));
|
||||
preIdx = i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async function isEOF(tp: TextProtoReader): Promise<boolean> {
|
||||
return (await tp.r.peek(0)) === Deno.EOF;
|
||||
}
|
||||
|
||||
async function readLine(tp: TextProtoReader): Promise<string | Deno.EOF> {
|
||||
let line: string;
|
||||
const r = await tp.readLine();
|
||||
if (r === Deno.EOF) return Deno.EOF;
|
||||
line = r;
|
||||
|
||||
// For backwards compatibility, drop trailing \r before EOF.
|
||||
if ((await isEOF(tp)) && line.length > 0 && line[line.length - 1] === "\r") {
|
||||
line = line.substring(0, line.length - 1);
|
||||
}
|
||||
|
||||
// Normalize \r\n to \n on all input lines.
|
||||
if (
|
||||
line.length >= 2 &&
|
||||
@ -76,41 +206,7 @@ async function read(
|
||||
line = line + "\n";
|
||||
}
|
||||
|
||||
const trimmedLine = line.trimLeft();
|
||||
if (trimmedLine.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// line starting with comment character is ignored
|
||||
if (opt.comment && trimmedLine[0] === opt.comment) {
|
||||
return [];
|
||||
}
|
||||
|
||||
assert(opt.comma != null);
|
||||
result = line.split(opt.comma);
|
||||
|
||||
let quoteError = false;
|
||||
result = result.map((r): string => {
|
||||
if (opt.trimLeadingSpace) {
|
||||
r = r.trimLeft();
|
||||
}
|
||||
if (r[0] === '"' && r[r.length - 1] === '"') {
|
||||
r = r.substring(1, r.length - 1);
|
||||
} else if (r[0] === '"') {
|
||||
r = r.substring(1, r.length);
|
||||
}
|
||||
|
||||
if (!opt.lazyQuotes) {
|
||||
if (r[0] !== '"' && r.indexOf('"') !== -1) {
|
||||
quoteError = true;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
});
|
||||
if (quoteError) {
|
||||
throw new ParseError(Startline, lineIndex, 'bare " in non-quoted-field');
|
||||
}
|
||||
return result;
|
||||
return line;
|
||||
}
|
||||
|
||||
export async function readMatrix(
|
||||
@ -129,7 +225,7 @@ export async function readMatrix(
|
||||
chkOptions(opt);
|
||||
|
||||
for (;;) {
|
||||
const r = await read(lineIndex, reader, opt);
|
||||
const r = await readRecord(lineIndex, reader, opt);
|
||||
if (r === Deno.EOF) break;
|
||||
lineResult = r;
|
||||
lineIndex++;
|
||||
@ -148,7 +244,7 @@ export async function readMatrix(
|
||||
|
||||
if (lineResult.length > 0) {
|
||||
if (_nbFields && _nbFields !== lineResult.length) {
|
||||
throw new ParseError(lineIndex, lineIndex, "wrong number of fields");
|
||||
throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT);
|
||||
}
|
||||
result.push(lineResult);
|
||||
}
|
||||
|
@ -1,15 +1,21 @@
|
||||
// Test ported from Golang
|
||||
// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
|
||||
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
|
||||
// https://github.com/golang/go/blob/master/LICENSE
|
||||
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
|
||||
|
||||
import { assertEquals, assert } from "../testing/asserts.ts";
|
||||
import { readMatrix, parse } from "./csv.ts";
|
||||
import {
|
||||
readMatrix,
|
||||
parse,
|
||||
ERR_BARE_QUOTE,
|
||||
ERR_QUOTE,
|
||||
ERR_INVALID_DELIM,
|
||||
ERR_FIELD_COUNT,
|
||||
} from "./csv.ts";
|
||||
import { StringReader } from "../io/readers.ts";
|
||||
import { BufReader } from "../io/bufio.ts";
|
||||
|
||||
const ErrInvalidDelim = "Invalid Delimiter";
|
||||
const ErrFieldCount = "wrong number of fields";
|
||||
const ErrBareQuote = 'bare " in non-quoted-field';
|
||||
|
||||
// TODO(zekth): Activate remaining tests
|
||||
const testCases = [
|
||||
{
|
||||
Name: "Simple",
|
||||
@ -43,7 +49,6 @@ zzz,yyy,xxx`,
|
||||
["a,a", `bbb`, "ccc"],
|
||||
["zzz", "yyy", "xxx"],
|
||||
],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "NoEOLTest",
|
||||
@ -62,8 +67,7 @@ zzz,yyy,xxx`,
|
||||
line","one line","three
|
||||
line
|
||||
field"`,
|
||||
Output: [["two\nline"], ["one line"], ["three\nline\nfield"]],
|
||||
ignore: true,
|
||||
Output: [["two\nline", "one line", "three\nline\nfield"]],
|
||||
},
|
||||
{
|
||||
Name: "BlankLine",
|
||||
@ -129,7 +133,7 @@ field"`,
|
||||
{
|
||||
Name: "BadDoubleQuotes",
|
||||
Input: `a""b,c`,
|
||||
Error: ErrBareQuote,
|
||||
Error: ERR_BARE_QUOTE,
|
||||
// Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
|
||||
},
|
||||
{
|
||||
@ -141,23 +145,23 @@ field"`,
|
||||
{
|
||||
Name: "BadBareQuote",
|
||||
Input: `a "word","b"`,
|
||||
Error: ErrBareQuote,
|
||||
Error: ERR_BARE_QUOTE,
|
||||
// &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingQuote",
|
||||
Input: `"a word",b"`,
|
||||
Error: ErrBareQuote,
|
||||
Error: ERR_BARE_QUOTE,
|
||||
},
|
||||
{
|
||||
Name: "ExtraneousQuote",
|
||||
Input: `"a "word","b"`,
|
||||
Error: ErrBareQuote,
|
||||
Error: ERR_QUOTE,
|
||||
},
|
||||
{
|
||||
Name: "BadFieldCount",
|
||||
Input: "a,b,c\nd,e",
|
||||
Error: ErrFieldCount,
|
||||
Error: ERR_FIELD_COUNT,
|
||||
UseFieldsPerRecord: true,
|
||||
FieldsPerRecord: 0,
|
||||
},
|
||||
@ -167,7 +171,7 @@ field"`,
|
||||
// Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
|
||||
UseFieldsPerRecord: true,
|
||||
FieldsPerRecord: 2,
|
||||
Error: ErrFieldCount,
|
||||
Error: ERR_FIELD_COUNT,
|
||||
},
|
||||
{
|
||||
Name: "FieldCount",
|
||||
@ -261,22 +265,19 @@ x,,,
|
||||
{
|
||||
Name: "StartLine1", // Issue 19019
|
||||
Input: 'a,"b\nc"d,e',
|
||||
Error: true,
|
||||
Error: ERR_QUOTE,
|
||||
// Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "StartLine2",
|
||||
Input: 'a,b\n"d\n\n,e',
|
||||
Error: true,
|
||||
Error: ERR_QUOTE,
|
||||
// Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "CRLFInQuotedField", // Issue 21201
|
||||
Input: 'A,"Hello\r\nHi",B\r\n',
|
||||
Output: [["A", "Hello\nHi", "B"]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "BinaryBlobField", // Issue 19410
|
||||
@ -287,32 +288,27 @@ x,,,
|
||||
Name: "TrailingCR",
|
||||
Input: "field1,field2\r",
|
||||
Output: [["field1", "field2"]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "QuotedTrailingCR",
|
||||
Input: '"field"\r',
|
||||
Output: [['"field"']],
|
||||
ignore: true,
|
||||
Output: [["field"]],
|
||||
},
|
||||
{
|
||||
Name: "QuotedTrailingCRCR",
|
||||
Input: '"field"\r\r',
|
||||
Error: true,
|
||||
Error: ERR_QUOTE,
|
||||
// Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "FieldCR",
|
||||
Input: "field\rfield\r",
|
||||
Output: [["field\rfield"]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "FieldCRCR",
|
||||
Input: "field\r\rfield\r\r",
|
||||
Output: [["field\r\rfield\r"]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "FieldCRCRLF",
|
||||
@ -328,7 +324,6 @@ x,,,
|
||||
Name: "FieldCRCRLFCRCR",
|
||||
Input: "field\r\r\n\r\rfield\r\r\n\r\r",
|
||||
Output: [["field\r"], ["\r\rfield\r"], ["\r"]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "MultiFieldCRCRLFCRCR",
|
||||
@ -338,7 +333,6 @@ x,,,
|
||||
["\r\rfield1", "field2\r"],
|
||||
["\r\r", ""],
|
||||
],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "NonASCIICommaAndComment",
|
||||
@ -374,12 +368,11 @@ x,,,
|
||||
Name: "QuotedFieldMultipleLF",
|
||||
Input: '"\n\n\n\n"',
|
||||
Output: [["\n\n\n\n"]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "MultipleCRLF",
|
||||
Input: "\r\n\r\n\r\n\r\n",
|
||||
ignore: true,
|
||||
Output: [],
|
||||
},
|
||||
/**
|
||||
* The implementation may read each line in several chunks if
|
||||
@ -392,12 +385,12 @@ x,,,
|
||||
"#ignore\n".repeat(10000) + "@".repeat(5000) + "," + "*".repeat(5000),
|
||||
Output: [["@".repeat(5000), "*".repeat(5000)]],
|
||||
Comment: "#",
|
||||
ignore: true,
|
||||
ignore: true, // TODO(#4521)
|
||||
},
|
||||
{
|
||||
Name: "QuoteWithTrailingCRLF",
|
||||
Input: '"foo"bar"\r\n',
|
||||
Error: ErrBareQuote,
|
||||
Error: ERR_QUOTE,
|
||||
// Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
|
||||
},
|
||||
{
|
||||
@ -410,58 +403,54 @@ x,,,
|
||||
Name: "DoubleQuoteWithTrailingCRLF",
|
||||
Input: '"foo""bar"\r\n',
|
||||
Output: [[`foo"bar`]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "EvenQuotes",
|
||||
Input: `""""""""`,
|
||||
Output: [[`"""`]],
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "OddQuotes",
|
||||
Input: `"""""""`,
|
||||
Error: true,
|
||||
Error: ERR_QUOTE,
|
||||
// Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "LazyOddQuotes",
|
||||
Input: `"""""""`,
|
||||
Output: [[`"""`]],
|
||||
LazyQuotes: true,
|
||||
ignore: true,
|
||||
},
|
||||
{
|
||||
Name: "BadComma1",
|
||||
Comma: "\n",
|
||||
Error: ErrInvalidDelim,
|
||||
Error: ERR_INVALID_DELIM,
|
||||
},
|
||||
{
|
||||
Name: "BadComma2",
|
||||
Comma: "\r",
|
||||
Error: ErrInvalidDelim,
|
||||
Error: ERR_INVALID_DELIM,
|
||||
},
|
||||
{
|
||||
Name: "BadComma3",
|
||||
Comma: '"',
|
||||
Error: ErrInvalidDelim,
|
||||
Error: ERR_INVALID_DELIM,
|
||||
},
|
||||
{
|
||||
Name: "BadComment1",
|
||||
Comment: "\n",
|
||||
Error: ErrInvalidDelim,
|
||||
Error: ERR_INVALID_DELIM,
|
||||
},
|
||||
{
|
||||
Name: "BadComment2",
|
||||
Comment: "\r",
|
||||
Error: ErrInvalidDelim,
|
||||
Error: ERR_INVALID_DELIM,
|
||||
},
|
||||
{
|
||||
Name: "BadCommaComment",
|
||||
Comma: "X",
|
||||
Comment: "X",
|
||||
Error: ErrInvalidDelim,
|
||||
Error: ERR_INVALID_DELIM,
|
||||
},
|
||||
];
|
||||
for (const t of testCases) {
|
||||
|
Loading…
Reference in New Issue
Block a user