fix(encoding/csv): enable skipped tests (denoland/deno#4520)

This commit is contained in:
uki00a 2020-03-31 00:37:58 +09:00 committed by denobot
parent 729753a26a
commit 3d8a63d8c7
2 changed files with 170 additions and 85 deletions

View File

@ -1,5 +1,7 @@
// Ported from Go:
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { BufReader } from "../io/bufio.ts";
@ -9,6 +11,11 @@ import { assert } from "../testing/asserts.ts";
const INVALID_RUNE = ["\r", "\n", '"'];
export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';
export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
export const ERR_INVALID_DELIM = "Invalid Delimiter";
export const ERR_FIELD_COUNT = "wrong number of fields";
export class ParseError extends Error {
StartLine: number;
Line: number;
@ -49,23 +56,146 @@ function chkOptions(opt: ReadOptions): void {
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
opt.comma === opt.comment
) {
throw new Error("Invalid Delimiter");
throw new Error(ERR_INVALID_DELIM);
}
}
async function read(
async function readRecord(
Startline: number,
reader: BufReader,
opt: ReadOptions = { comma: ",", trimLeadingSpace: false }
): Promise<string[] | Deno.EOF> {
const tp = new TextProtoReader(reader);
let line: string;
let result: string[] = [];
const lineIndex = Startline;
let line = await readLine(tp);
if (line === Deno.EOF) return Deno.EOF;
if (line.length === 0) {
return [];
}
// line starting with comment character is ignored
if (opt.comment && line[0] === opt.comment) {
return [];
}
assert(opt.comma != null);
let quoteError: string | null = null;
const quote = '"';
const quoteLen = quote.length;
const commaLen = opt.comma.length;
let recordBuffer = "";
const fieldIndexes = [] as number[];
parseField: for (;;) {
if (opt.trimLeadingSpace) {
line = line.trimLeft();
}
if (line.length === 0 || !line.startsWith(quote)) {
// Non-quoted string field
const i = line.indexOf(opt.comma);
let field = line;
if (i >= 0) {
field = field.substring(0, i);
}
// Check to make sure a quote does not appear in field.
if (!opt.lazyQuotes) {
const j = field.indexOf(quote);
if (j >= 0) {
quoteError = ERR_BARE_QUOTE;
break parseField;
}
}
recordBuffer += field;
fieldIndexes.push(recordBuffer.length);
if (i >= 0) {
line = line.substring(i + commaLen);
continue parseField;
}
break parseField;
} else {
// Quoted string field
line = line.substring(quoteLen);
for (;;) {
const i = line.indexOf(quote);
if (i >= 0) {
// Hit next quote.
recordBuffer += line.substring(0, i);
line = line.substring(i + quoteLen);
if (line.startsWith(quote)) {
// `""` sequence (append quote).
recordBuffer += quote;
line = line.substring(quoteLen);
} else if (line.startsWith(opt.comma)) {
// `","` sequence (end of field).
line = line.substring(commaLen);
fieldIndexes.push(recordBuffer.length);
continue parseField;
} else if (0 === line.length) {
// `"\n` sequence (end of line).
fieldIndexes.push(recordBuffer.length);
break parseField;
} else if (opt.lazyQuotes) {
// `"` sequence (bare quote).
recordBuffer += quote;
} else {
// `"*` sequence (invalid non-escaped quote).
quoteError = ERR_QUOTE;
break parseField;
}
} else if (line.length > 0 || !(await isEOF(tp))) {
// Hit end of line (copy all data so far).
recordBuffer += line;
const r = await readLine(tp);
if (r === Deno.EOF) {
if (!opt.lazyQuotes) {
quoteError = ERR_QUOTE;
break parseField;
}
fieldIndexes.push(recordBuffer.length);
break parseField;
}
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
line = r;
} else {
// Abrupt end of file (EOF on error).
if (!opt.lazyQuotes) {
quoteError = ERR_QUOTE;
break parseField;
}
fieldIndexes.push(recordBuffer.length);
break parseField;
}
}
}
}
if (quoteError) {
throw new ParseError(Startline, lineIndex, quoteError);
}
const result = [] as string[];
let preIdx = 0;
for (const i of fieldIndexes) {
result.push(recordBuffer.slice(preIdx, i));
preIdx = i;
}
return result;
}
async function isEOF(tp: TextProtoReader): Promise<boolean> {
return (await tp.r.peek(0)) === Deno.EOF;
}
async function readLine(tp: TextProtoReader): Promise<string | Deno.EOF> {
let line: string;
const r = await tp.readLine();
if (r === Deno.EOF) return Deno.EOF;
line = r;
// For backwards compatibility, drop trailing \r before EOF.
if ((await isEOF(tp)) && line.length > 0 && line[line.length - 1] === "\r") {
line = line.substring(0, line.length - 1);
}
// Normalize \r\n to \n on all input lines.
if (
line.length >= 2 &&
@ -76,41 +206,7 @@ async function read(
line = line + "\n";
}
const trimmedLine = line.trimLeft();
if (trimmedLine.length === 0) {
return [];
}
// line starting with comment character is ignored
if (opt.comment && trimmedLine[0] === opt.comment) {
return [];
}
assert(opt.comma != null);
result = line.split(opt.comma);
let quoteError = false;
result = result.map((r): string => {
if (opt.trimLeadingSpace) {
r = r.trimLeft();
}
if (r[0] === '"' && r[r.length - 1] === '"') {
r = r.substring(1, r.length - 1);
} else if (r[0] === '"') {
r = r.substring(1, r.length);
}
if (!opt.lazyQuotes) {
if (r[0] !== '"' && r.indexOf('"') !== -1) {
quoteError = true;
}
}
return r;
});
if (quoteError) {
throw new ParseError(Startline, lineIndex, 'bare " in non-quoted-field');
}
return result;
return line;
}
export async function readMatrix(
@ -129,7 +225,7 @@ export async function readMatrix(
chkOptions(opt);
for (;;) {
const r = await read(lineIndex, reader, opt);
const r = await readRecord(lineIndex, reader, opt);
if (r === Deno.EOF) break;
lineResult = r;
lineIndex++;
@ -148,7 +244,7 @@ export async function readMatrix(
if (lineResult.length > 0) {
if (_nbFields && _nbFields !== lineResult.length) {
throw new ParseError(lineIndex, lineIndex, "wrong number of fields");
throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT);
}
result.push(lineResult);
}

View File

@ -1,15 +1,21 @@
// Test ported from Golang
// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { assertEquals, assert } from "../testing/asserts.ts";
import { readMatrix, parse } from "./csv.ts";
import {
readMatrix,
parse,
ERR_BARE_QUOTE,
ERR_QUOTE,
ERR_INVALID_DELIM,
ERR_FIELD_COUNT,
} from "./csv.ts";
import { StringReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts";
const ErrInvalidDelim = "Invalid Delimiter";
const ErrFieldCount = "wrong number of fields";
const ErrBareQuote = 'bare " in non-quoted-field';
// TODO(zekth): Activate remaining tests
const testCases = [
{
Name: "Simple",
@ -43,7 +49,6 @@ zzz,yyy,xxx`,
["a,a", `bbb`, "ccc"],
["zzz", "yyy", "xxx"],
],
ignore: true,
},
{
Name: "NoEOLTest",
@ -62,8 +67,7 @@ zzz,yyy,xxx`,
line","one line","three
line
field"`,
Output: [["two\nline"], ["one line"], ["three\nline\nfield"]],
ignore: true,
Output: [["two\nline", "one line", "three\nline\nfield"]],
},
{
Name: "BlankLine",
@ -129,7 +133,7 @@ field"`,
{
Name: "BadDoubleQuotes",
Input: `a""b,c`,
Error: ErrBareQuote,
Error: ERR_BARE_QUOTE,
// Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
},
{
@ -141,23 +145,23 @@ field"`,
{
Name: "BadBareQuote",
Input: `a "word","b"`,
Error: ErrBareQuote,
Error: ERR_BARE_QUOTE,
// &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}
},
{
Name: "BadTrailingQuote",
Input: `"a word",b"`,
Error: ErrBareQuote,
Error: ERR_BARE_QUOTE,
},
{
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
Error: ErrBareQuote,
Error: ERR_QUOTE,
},
{
Name: "BadFieldCount",
Input: "a,b,c\nd,e",
Error: ErrFieldCount,
Error: ERR_FIELD_COUNT,
UseFieldsPerRecord: true,
FieldsPerRecord: 0,
},
@ -167,7 +171,7 @@ field"`,
// Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
Error: ErrFieldCount,
Error: ERR_FIELD_COUNT,
},
{
Name: "FieldCount",
@ -261,22 +265,19 @@ x,,,
{
Name: "StartLine1", // Issue 19019
Input: 'a,"b\nc"d,e',
Error: true,
Error: ERR_QUOTE,
// Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
ignore: true,
},
{
Name: "StartLine2",
Input: 'a,b\n"d\n\n,e',
Error: true,
Error: ERR_QUOTE,
// Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
ignore: true,
},
{
Name: "CRLFInQuotedField", // Issue 21201
Input: 'A,"Hello\r\nHi",B\r\n',
Output: [["A", "Hello\nHi", "B"]],
ignore: true,
},
{
Name: "BinaryBlobField", // Issue 19410
@ -287,32 +288,27 @@ x,,,
Name: "TrailingCR",
Input: "field1,field2\r",
Output: [["field1", "field2"]],
ignore: true,
},
{
Name: "QuotedTrailingCR",
Input: '"field"\r',
Output: [['"field"']],
ignore: true,
Output: [["field"]],
},
{
Name: "QuotedTrailingCRCR",
Input: '"field"\r\r',
Error: true,
Error: ERR_QUOTE,
// Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
ignore: true,
},
{
Name: "FieldCR",
Input: "field\rfield\r",
Output: [["field\rfield"]],
ignore: true,
},
{
Name: "FieldCRCR",
Input: "field\r\rfield\r\r",
Output: [["field\r\rfield\r"]],
ignore: true,
},
{
Name: "FieldCRCRLF",
@ -328,7 +324,6 @@ x,,,
Name: "FieldCRCRLFCRCR",
Input: "field\r\r\n\r\rfield\r\r\n\r\r",
Output: [["field\r"], ["\r\rfield\r"], ["\r"]],
ignore: true,
},
{
Name: "MultiFieldCRCRLFCRCR",
@ -338,7 +333,6 @@ x,,,
["\r\rfield1", "field2\r"],
["\r\r", ""],
],
ignore: true,
},
{
Name: "NonASCIICommaAndComment",
@ -374,12 +368,11 @@ x,,,
Name: "QuotedFieldMultipleLF",
Input: '"\n\n\n\n"',
Output: [["\n\n\n\n"]],
ignore: true,
},
{
Name: "MultipleCRLF",
Input: "\r\n\r\n\r\n\r\n",
ignore: true,
Output: [],
},
/**
* The implementation may read each line in several chunks if
@ -392,12 +385,12 @@ x,,,
"#ignore\n".repeat(10000) + "@".repeat(5000) + "," + "*".repeat(5000),
Output: [["@".repeat(5000), "*".repeat(5000)]],
Comment: "#",
ignore: true,
ignore: true, // TODO(#4521)
},
{
Name: "QuoteWithTrailingCRLF",
Input: '"foo"bar"\r\n',
Error: ErrBareQuote,
Error: ERR_QUOTE,
// Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
},
{
@ -410,58 +403,54 @@ x,,,
Name: "DoubleQuoteWithTrailingCRLF",
Input: '"foo""bar"\r\n',
Output: [[`foo"bar`]],
ignore: true,
},
{
Name: "EvenQuotes",
Input: `""""""""`,
Output: [[`"""`]],
ignore: true,
},
{
Name: "OddQuotes",
Input: `"""""""`,
Error: true,
Error: ERR_QUOTE,
// Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
ignore: true,
},
{
Name: "LazyOddQuotes",
Input: `"""""""`,
Output: [[`"""`]],
LazyQuotes: true,
ignore: true,
},
{
Name: "BadComma1",
Comma: "\n",
Error: ErrInvalidDelim,
Error: ERR_INVALID_DELIM,
},
{
Name: "BadComma2",
Comma: "\r",
Error: ErrInvalidDelim,
Error: ERR_INVALID_DELIM,
},
{
Name: "BadComma3",
Comma: '"',
Error: ErrInvalidDelim,
Error: ERR_INVALID_DELIM,
},
{
Name: "BadComment1",
Comment: "\n",
Error: ErrInvalidDelim,
Error: ERR_INVALID_DELIM,
},
{
Name: "BadComment2",
Comment: "\r",
Error: ErrInvalidDelim,
Error: ERR_INVALID_DELIM,
},
{
Name: "BadCommaComment",
Comma: "X",
Comment: "X",
Error: ErrInvalidDelim,
Error: ERR_INVALID_DELIM,
},
];
for (const t of testCases) {