fix(csv): show 1-based line and column numbers in error messages (#5604)

This commit is contained in:
Yusuke Tanaka 2024-08-02 00:07:52 +09:00 committed by GitHub
parent b5181c1b6a
commit 89770fc76a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 108 additions and 43 deletions

View File

@ -64,8 +64,8 @@ export async function parseRecord(
fullLine: string,
reader: LineReader,
options: ReadOptions,
startLine: number,
lineIndex: number = startLine,
zeroBasedRecordStartLine: number,
zeroBasedLine: number = zeroBasedRecordStartLine,
): Promise<Array<string>> {
// line starting with comment character is ignored
if (options.comment && fullLine[0] === options.comment) {
@ -102,7 +102,11 @@ export async function parseRecord(
fullLine.slice(0, fullLine.length - line.slice(j).length),
);
throw new SyntaxError(
createBareQuoteErrorMessage(startLine + 1, lineIndex, col),
createBareQuoteErrorMessage(
zeroBasedRecordStartLine,
zeroBasedLine,
col,
),
);
}
}
@ -144,14 +148,17 @@ export async function parseRecord(
fullLine.slice(0, fullLine.length - line.length - quoteLen),
);
throw new SyntaxError(
createQuoteErrorMessage(startLine + 1, lineIndex, col),
createQuoteErrorMessage(
zeroBasedRecordStartLine,
zeroBasedLine,
col,
),
);
}
} else if (line.length > 0 || !reader.isEOF()) {
// Hit end of line (copy all data so far).
recordBuffer += line;
const r = await reader.readLine();
lineIndex++;
line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
fullLine = line;
if (r === null) {
@ -159,19 +166,28 @@ export async function parseRecord(
if (!options.lazyQuotes) {
const col = codePointLength(fullLine);
throw new SyntaxError(
createQuoteErrorMessage(startLine + 1, lineIndex, col),
createQuoteErrorMessage(
zeroBasedRecordStartLine,
zeroBasedLine,
col,
),
);
}
fieldIndexes.push(recordBuffer.length);
break parseField;
}
zeroBasedLine++;
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
} else {
// Abrupt end of file (EOF on error).
if (!options.lazyQuotes) {
const col = codePointLength(fullLine);
throw new SyntaxError(
createQuoteErrorMessage(startLine + 1, lineIndex, col),
createQuoteErrorMessage(
zeroBasedRecordStartLine,
zeroBasedLine,
col,
),
);
}
fieldIndexes.push(recordBuffer.length);
@ -190,18 +206,22 @@ export async function parseRecord(
}
export function createBareQuoteErrorMessage(
start: number,
line: number,
column: number,
zeroBasedRecordStartLine: number,
zeroBasedLine: number,
zeroBasedColumn: number,
) {
return `record on line ${start}; parse error on line ${line}, column ${column}: bare " in non-quoted-field`;
return `record on line ${zeroBasedRecordStartLine + 1}; parse error on line ${
zeroBasedLine + 1
}, column ${zeroBasedColumn + 1}: bare " in non-quoted-field`;
}
export function createQuoteErrorMessage(
start: number,
line: number,
column: number,
zeroBasedRecordStartLine: number,
zeroBasedLine: number,
zeroBasedColumn: number,
) {
return `record on line ${start}; parse error on line ${line}, column ${column}: extraneous or missing " in quoted-field`;
return `record on line ${zeroBasedRecordStartLine + 1}; parse error on line ${
zeroBasedLine + 1
}, column ${zeroBasedColumn + 1}: extraneous or missing " in quoted-field`;
}
export function convertRowToObject(

View File

@ -70,14 +70,14 @@ class Parser {
#isEOF(): boolean {
return this.#cursor >= this.#input.length;
}
#parseRecord(startLine: number): string[] | null {
#parseRecord(zeroBasedStartLine: number): string[] | null {
let fullLine = this.#readLine();
if (fullLine === null) return null;
if (fullLine.length === 0) {
return [];
}
let lineIndex = startLine + 1;
let zeroBasedLine = zeroBasedStartLine;
// line starting with comment character is ignored
if (this.#options.comment && fullLine[0] === this.#options.comment) {
@ -110,7 +110,11 @@ class Parser {
fullLine.slice(0, fullLine.length - line.slice(j).length),
);
throw new SyntaxError(
createBareQuoteErrorMessage(startLine + 1, lineIndex, col),
createBareQuoteErrorMessage(
zeroBasedStartLine,
zeroBasedLine,
col,
),
);
}
}
@ -152,14 +156,13 @@ class Parser {
fullLine.slice(0, fullLine.length - line.length - quoteLen),
);
throw new SyntaxError(
createQuoteErrorMessage(startLine + 1, lineIndex, col),
createQuoteErrorMessage(zeroBasedStartLine, zeroBasedLine, col),
);
}
} else if (line.length > 0 || !(this.#isEOF())) {
// Hit end of line (copy all data so far).
recordBuffer += line;
const r = this.#readLine();
lineIndex++;
line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
fullLine = line;
if (r === null) {
@ -167,19 +170,24 @@ class Parser {
if (!this.#options.lazyQuotes) {
const col = codePointLength(fullLine);
throw new SyntaxError(
createQuoteErrorMessage(startLine + 1, lineIndex, col),
createQuoteErrorMessage(
zeroBasedStartLine,
zeroBasedLine,
col,
),
);
}
fieldIndexes.push(recordBuffer.length);
break parseField;
}
zeroBasedLine++;
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
} else {
// Abrupt end of file (EOF on error).
if (!this.#options.lazyQuotes) {
const col = codePointLength(fullLine);
throw new SyntaxError(
createQuoteErrorMessage(startLine + 1, lineIndex, col),
createQuoteErrorMessage(zeroBasedStartLine, zeroBasedLine, col),
);
}
fieldIndexes.push(recordBuffer.length);

View File

@ -132,7 +132,7 @@ export class CsvParseStream<
readonly #options: CsvParseStreamOptions;
readonly #lineReader: StreamLineReader;
readonly #lines: TextDelimiterStream;
#lineIndex = 0;
#zeroBasedLineIndex = 0;
#isFirstRow = true;
// The number of fields per record that is either inferred from the first row
@ -186,7 +186,7 @@ export class CsvParseStream<
const line = await this.#lineReader.readLine();
if (line === "") {
// Found an empty line
this.#lineIndex++;
this.#zeroBasedLineIndex++;
return this.#pull(controller);
}
if (line === null) {
@ -200,7 +200,7 @@ export class CsvParseStream<
line,
this.#lineReader,
this.#options,
this.#lineIndex,
this.#zeroBasedLineIndex,
);
if (this.#isFirstRow) {
@ -233,18 +233,18 @@ export class CsvParseStream<
) {
throw new SyntaxError(
`record on line ${
this.#lineIndex + 1
this.#zeroBasedLineIndex + 1
}: expected ${this.#fieldsPerRecord} fields but got ${record.length}`,
);
}
this.#lineIndex++;
this.#zeroBasedLineIndex++;
if (record.length > 0) {
if (this.#options.skipFirstRow || this.#options.columns) {
controller.enqueue(convertRowToObject(
record,
this.#headers,
this.#lineIndex,
this.#zeroBasedLineIndex,
));
} else {
controller.enqueue(record);

View File

@ -45,7 +45,7 @@ Deno.test({
await assertRejects(
() => reader.read(),
SyntaxError,
`record on line 4; parse error on line 5, column 0: extraneous or missing " in quoted-field`,
`record on line 4; parse error on line 5, column 1: extraneous or missing " in quoted-field`,
);
},
});
@ -366,7 +366,7 @@ x,,,
error: {
klass: SyntaxError,
msg:
'record on line 1; parse error on line 0, column 2: bare " in non-quoted-field',
'record on line 1; parse error on line 1, column 3: bare " in non-quoted-field',
},
},
{
@ -375,7 +375,25 @@ x,,,
error: {
klass: SyntaxError,
msg:
'record on line 1; parse error on line 0, column 3: extraneous or missing " in quoted-field',
'record on line 1; parse error on line 1, column 4: extraneous or missing " in quoted-field',
},
},
{
name: "bad quote at line 1 in quoted field with newline",
input: `"w\n\no"rd",1,2,3`,
error: {
klass: SyntaxError,
msg:
'record on line 1; parse error on line 3, column 2: extraneous or missing " in quoted-field',
},
},
{
name: "bad quote at line 2 in quoted field with newline",
input: `a,b,c,d\n"w\n\no"rd",1,2,3`,
error: {
klass: SyntaxError,
msg:
'record on line 2; parse error on line 4, column 2: extraneous or missing " in quoted-field',
},
},
{

View File

@ -194,7 +194,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
'parse error on line 1, column 1: bare " in non-quoted-field',
'parse error on line 1, column 2: bare " in non-quoted-field',
);
},
});
@ -205,7 +205,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
'parse error on line 1, column 5: bare " in non-quoted-field',
'parse error on line 1, column 6: bare " in non-quoted-field',
);
},
});
@ -224,7 +224,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
'parse error on line 1, column 2: bare " in non-quoted-field',
'parse error on line 1, column 3: bare " in non-quoted-field',
);
},
});
@ -235,7 +235,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
'parse error on line 1, column 10: bare " in non-quoted-field',
'parse error on line 1, column 11: bare " in non-quoted-field',
);
},
});
@ -246,7 +246,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
`parse error on line 1, column 3: extraneous or missing " in quoted-field`,
`parse error on line 1, column 4: extraneous or missing " in quoted-field`,
);
},
});
@ -381,22 +381,41 @@ Deno.test({
await t.step({
name: "StartLine1", // Issue 19019
fn() {
const input = 'a,"b\nc"d,e';
const input = `a,"b
c"d,e`;
assertThrows(
() => parse(input, { fieldsPerRecord: 2 }),
SyntaxError,
'record on line 1; parse error on line 2, column 1: extraneous or missing " in quoted-field',
'record on line 1; parse error on line 2, column 2: extraneous or missing " in quoted-field',
);
},
});
await t.step({
name: "StartLine2",
fn() {
const input = 'a,b\n"d\n\n,e';
const input = `a,b
"d
,e`;
assertThrows(
() => parse(input, { fieldsPerRecord: 2 }),
SyntaxError,
'record on line 2; parse error on line 5, column 0: extraneous or missing " in quoted-field',
'record on line 2; parse error on line 4, column 1: extraneous or missing " in quoted-field',
);
},
});
await t.step({
name: "ParseErrorLine",
fn() {
const input = `id,name
1,foo
2,"baz
`;
assertThrows(
() => parse(input),
SyntaxError,
'record on line 4; parse error on line 4, column 1: extraneous or missing " in quoted-field',
);
},
});
@ -439,7 +458,7 @@ Deno.test({
assertThrows(
() => parse(input, { fieldsPerRecord: 2 }),
SyntaxError,
'parse error on line 1, column 6: extraneous or missing " in quoted-field',
'parse error on line 1, column 7: extraneous or missing " in quoted-field',
);
},
});
@ -582,7 +601,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
`parse error on line 1, column 4: extraneous or missing " in quoted-field`,
`parse error on line 1, column 5: extraneous or missing " in quoted-field`,
);
},
});
@ -617,7 +636,7 @@ Deno.test({
assertThrows(
() => parse(input),
SyntaxError,
`parse error on line 1, column 7: extraneous or missing " in quoted-field`,
`parse error on line 1, column 8: extraneous or missing " in quoted-field`,
);
},
});