2022-04-06 04:46:45 +00:00
|
|
|
// Originally ported from Go:
|
|
|
|
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
|
|
|
|
// Copyright 2011 The Go Authors. All rights reserved. BSD license.
|
|
|
|
// https://github.com/golang/go/blob/master/LICENSE
|
2024-01-01 21:11:32 +00:00
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2022-04-06 04:46:45 +00:00
|
|
|
|
2024-07-12 06:23:13 +00:00
|
|
|
import { codePointLength } from "./_shared.ts";
|
2024-07-04 08:34:22 +00:00
|
|
|
|
2024-01-11 06:02:30 +00:00
|
|
|
/** Options for {@linkcode parseRecord}. */
|
2022-04-06 04:46:45 +00:00
|
|
|
export interface ReadOptions {
|
2022-11-25 11:40:23 +00:00
|
|
|
/** Character which separates values.
|
|
|
|
*
|
|
|
|
* @default {","}
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
separator?: string;
|
2022-11-25 11:40:23 +00:00
|
|
|
/** Character to start a comment.
|
2023-03-24 03:07:30 +00:00
|
|
|
*
|
|
|
|
* Lines beginning with the comment character without preceding whitespace
|
|
|
|
* are ignored. With leading whitespace the comment character becomes part of
|
|
|
|
* the field, even you provide `trimLeadingSpace: true`.
|
2022-11-25 11:40:23 +00:00
|
|
|
*
|
|
|
|
* @default {"#"}
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
comment?: string;
|
2022-11-25 11:40:23 +00:00
|
|
|
/** Flag to trim the leading space of the value.
|
2023-03-24 03:07:30 +00:00
|
|
|
*
|
|
|
|
* This is done even if the field delimiter, `separator`, is white space.
|
2022-11-25 11:40:23 +00:00
|
|
|
*
|
|
|
|
* @default {false}
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
trimLeadingSpace?: boolean;
|
2022-11-25 11:40:23 +00:00
|
|
|
/**
|
|
|
|
* Allow unquoted quote in a quoted field or non-double-quoted quotes in
|
|
|
|
* quoted field.
|
|
|
|
*
|
|
|
|
* @default {false}
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
lazyQuotes?: boolean;
|
2022-11-25 11:40:23 +00:00
|
|
|
/**
|
2023-03-24 03:07:30 +00:00
|
|
|
* Enabling checking number of expected fields for each row.
|
|
|
|
*
|
|
|
|
* If positive, each record is required to have the given number of fields.
|
2023-08-25 09:04:43 +00:00
|
|
|
* If === 0, it will be set to the number of fields in the first row, so that
|
2023-03-24 03:07:30 +00:00
|
|
|
* future rows must have the same field count.
|
|
|
|
* If negative, no check is made and records may have a variable number of
|
|
|
|
* fields.
|
|
|
|
*
|
|
|
|
* If the wrong number of fields is in a row, a `ParseError` is thrown.
|
2022-11-25 11:40:23 +00:00
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
fieldsPerRecord?: number;
|
|
|
|
}
|
|
|
|
|
|
|
|
export const defaultReadOptions: ReadOptions = {
|
|
|
|
separator: ",",
|
|
|
|
trimLeadingSpace: false,
|
|
|
|
};
|
|
|
|
|
|
|
|
export interface LineReader {
|
|
|
|
readLine(): Promise<string | null>;
|
2023-05-12 02:45:25 +00:00
|
|
|
isEOF(): boolean;
|
2022-04-06 04:46:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
export async function parseRecord(
|
2024-07-04 11:15:24 +00:00
|
|
|
fullLine: string,
|
2022-04-06 04:46:45 +00:00
|
|
|
reader: LineReader,
|
2024-07-04 11:15:24 +00:00
|
|
|
options: ReadOptions,
|
2022-04-06 04:46:45 +00:00
|
|
|
startLine: number,
|
|
|
|
lineIndex: number = startLine,
|
2024-06-27 03:48:19 +00:00
|
|
|
): Promise<Array<string>> {
|
2022-04-06 04:46:45 +00:00
|
|
|
// line starting with comment character is ignored
|
2024-07-04 11:15:24 +00:00
|
|
|
if (options.comment && fullLine[0] === options.comment) {
|
2022-04-06 04:46:45 +00:00
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
2024-07-04 11:15:24 +00:00
|
|
|
if (options.separator === undefined) {
|
|
|
|
throw new TypeError("Separator is required");
|
|
|
|
}
|
2022-04-06 04:46:45 +00:00
|
|
|
|
2024-07-04 11:15:24 +00:00
|
|
|
let line = fullLine;
|
2022-04-06 04:46:45 +00:00
|
|
|
const quote = '"';
|
|
|
|
const quoteLen = quote.length;
|
2024-07-04 11:15:24 +00:00
|
|
|
const separatorLen = options.separator.length;
|
2022-04-06 04:46:45 +00:00
|
|
|
let recordBuffer = "";
|
|
|
|
const fieldIndexes = [] as number[];
|
2024-07-02 03:17:58 +00:00
|
|
|
parseField: while (true) {
|
2024-07-04 11:15:24 +00:00
|
|
|
if (options.trimLeadingSpace) {
|
2022-04-06 04:46:45 +00:00
|
|
|
line = line.trimStart();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (line.length === 0 || !line.startsWith(quote)) {
|
|
|
|
// Non-quoted string field
|
2024-07-04 11:15:24 +00:00
|
|
|
const i = line.indexOf(options.separator);
|
2022-04-06 04:46:45 +00:00
|
|
|
let field = line;
|
|
|
|
if (i >= 0) {
|
|
|
|
field = field.substring(0, i);
|
|
|
|
}
|
|
|
|
// Check to make sure a quote does not appear in field.
|
2024-07-04 11:15:24 +00:00
|
|
|
if (!options.lazyQuotes) {
|
2022-04-06 04:46:45 +00:00
|
|
|
const j = field.indexOf(quote);
|
|
|
|
if (j >= 0) {
|
2024-07-12 06:23:13 +00:00
|
|
|
const col = codePointLength(
|
2024-07-04 08:34:22 +00:00
|
|
|
fullLine.slice(0, fullLine.length - line.slice(j).length),
|
|
|
|
);
|
2024-07-04 07:08:57 +00:00
|
|
|
throw new ParseError(startLine + 1, lineIndex, col, ERR_BARE_QUOTE);
|
2022-04-06 04:46:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
recordBuffer += field;
|
|
|
|
fieldIndexes.push(recordBuffer.length);
|
|
|
|
if (i >= 0) {
|
|
|
|
line = line.substring(i + separatorLen);
|
|
|
|
continue parseField;
|
|
|
|
}
|
|
|
|
break parseField;
|
|
|
|
} else {
|
|
|
|
// Quoted string field
|
|
|
|
line = line.substring(quoteLen);
|
2024-07-02 03:17:58 +00:00
|
|
|
while (true) {
|
2022-04-06 04:46:45 +00:00
|
|
|
const i = line.indexOf(quote);
|
|
|
|
if (i >= 0) {
|
|
|
|
// Hit next quote.
|
|
|
|
recordBuffer += line.substring(0, i);
|
|
|
|
line = line.substring(i + quoteLen);
|
|
|
|
if (line.startsWith(quote)) {
|
|
|
|
// `""` sequence (append quote).
|
|
|
|
recordBuffer += quote;
|
|
|
|
line = line.substring(quoteLen);
|
2024-07-04 11:15:24 +00:00
|
|
|
} else if (line.startsWith(options.separator)) {
|
2022-04-06 04:46:45 +00:00
|
|
|
// `","` sequence (end of field).
|
|
|
|
line = line.substring(separatorLen);
|
|
|
|
fieldIndexes.push(recordBuffer.length);
|
|
|
|
continue parseField;
|
|
|
|
} else if (0 === line.length) {
|
|
|
|
// `"\n` sequence (end of line).
|
|
|
|
fieldIndexes.push(recordBuffer.length);
|
|
|
|
break parseField;
|
2024-07-04 11:15:24 +00:00
|
|
|
} else if (options.lazyQuotes) {
|
2022-04-06 04:46:45 +00:00
|
|
|
// `"` sequence (bare quote).
|
|
|
|
recordBuffer += quote;
|
|
|
|
} else {
|
|
|
|
// `"*` sequence (invalid non-escaped quote).
|
2024-07-12 06:23:13 +00:00
|
|
|
const col = codePointLength(
|
2024-07-04 08:34:22 +00:00
|
|
|
fullLine.slice(0, fullLine.length - line.length - quoteLen),
|
|
|
|
);
|
2024-07-04 07:08:57 +00:00
|
|
|
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
|
2022-04-06 04:46:45 +00:00
|
|
|
}
|
2023-05-12 02:45:25 +00:00
|
|
|
} else if (line.length > 0 || !reader.isEOF()) {
|
2022-04-06 04:46:45 +00:00
|
|
|
// Hit end of line (copy all data so far).
|
|
|
|
recordBuffer += line;
|
|
|
|
const r = await reader.readLine();
|
|
|
|
lineIndex++;
|
|
|
|
line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
|
|
|
|
fullLine = line;
|
|
|
|
if (r === null) {
|
|
|
|
// Abrupt end of file (EOF or error).
|
2024-07-04 11:15:24 +00:00
|
|
|
if (!options.lazyQuotes) {
|
2024-07-12 06:23:13 +00:00
|
|
|
const col = codePointLength(fullLine);
|
2024-07-04 07:08:57 +00:00
|
|
|
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
|
2022-04-06 04:46:45 +00:00
|
|
|
}
|
|
|
|
fieldIndexes.push(recordBuffer.length);
|
|
|
|
break parseField;
|
|
|
|
}
|
|
|
|
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
|
|
|
|
} else {
|
|
|
|
// Abrupt end of file (EOF on error).
|
2024-07-04 11:15:24 +00:00
|
|
|
if (!options.lazyQuotes) {
|
2024-07-12 06:23:13 +00:00
|
|
|
const col = codePointLength(fullLine);
|
2024-07-04 07:08:57 +00:00
|
|
|
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
|
2022-04-06 04:46:45 +00:00
|
|
|
}
|
|
|
|
fieldIndexes.push(recordBuffer.length);
|
|
|
|
break parseField;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const result = [] as string[];
|
|
|
|
let preIdx = 0;
|
|
|
|
for (const i of fieldIndexes) {
|
|
|
|
result.push(recordBuffer.slice(preIdx, i));
|
|
|
|
preIdx = i;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A ParseError is returned for parsing errors.
|
|
|
|
* Line numbers are 1-indexed and columns are 0-indexed.
|
2024-06-03 03:32:09 +00:00
|
|
|
*
|
|
|
|
* @example Usage
|
|
|
|
* ```ts
|
|
|
|
* import { parse, ParseError } from "@std/csv/parse";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 03:32:09 +00:00
|
|
|
*
|
|
|
|
* try {
|
|
|
|
* parse(`a "word","b"`);
|
|
|
|
* } catch (error) {
|
|
|
|
* if (error instanceof ParseError) {
|
2024-06-03 05:44:18 +00:00
|
|
|
* assertEquals(error.message, `parse error on line 1, column 2: bare " in non-quoted-field`);
|
2024-06-03 03:32:09 +00:00
|
|
|
* }
|
|
|
|
* }
|
|
|
|
* ```
|
2022-04-06 04:46:45 +00:00
|
|
|
*/
|
|
|
|
export class ParseError extends SyntaxError {
|
2024-06-03 03:32:09 +00:00
|
|
|
/**
|
|
|
|
* Line where the record starts.
|
|
|
|
*
|
|
|
|
* @example Usage
|
|
|
|
* ```ts
|
|
|
|
* import { parse, ParseError } from "@std/csv/parse";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 03:32:09 +00:00
|
|
|
*
|
|
|
|
* try {
|
|
|
|
* parse(`a "word","b"`);
|
|
|
|
* } catch (error) {
|
|
|
|
* if (error instanceof ParseError) {
|
2024-06-03 05:44:18 +00:00
|
|
|
* assertEquals(error.startLine, 1);
|
2024-06-03 03:32:09 +00:00
|
|
|
* }
|
|
|
|
* }
|
|
|
|
* ```
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
startLine: number;
|
2024-06-03 03:32:09 +00:00
|
|
|
/**
|
|
|
|
* Line where the error occurred.
|
|
|
|
*
|
|
|
|
* @example Usage
|
|
|
|
* ```ts
|
|
|
|
* import { parse, ParseError } from "@std/csv/parse";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 03:32:09 +00:00
|
|
|
*
|
|
|
|
* try {
|
|
|
|
* parse(`a "word","b"`);
|
|
|
|
* } catch (error) {
|
|
|
|
* if (error instanceof ParseError) {
|
2024-06-03 05:44:18 +00:00
|
|
|
* assertEquals(error.line, 1);
|
2024-06-03 03:32:09 +00:00
|
|
|
* }
|
|
|
|
* }
|
|
|
|
* ```
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
line: number;
|
2024-06-03 03:32:09 +00:00
|
|
|
/**
|
|
|
|
* Column (rune index) where the error occurred.
|
|
|
|
*
|
|
|
|
* @example Usage
|
|
|
|
* ```ts
|
|
|
|
* import { parse, ParseError } from "@std/csv/parse";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 03:32:09 +00:00
|
|
|
*
|
|
|
|
* try {
|
|
|
|
* parse(`a "word","b"`);
|
|
|
|
* } catch (error) {
|
|
|
|
* if (error instanceof ParseError) {
|
2024-06-03 05:44:18 +00:00
|
|
|
* assertEquals(error.column, 2);
|
2024-06-03 03:32:09 +00:00
|
|
|
* }
|
|
|
|
* }
|
|
|
|
* ```
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
column: number | null;
|
|
|
|
|
2024-06-03 03:32:09 +00:00
|
|
|
/**
|
|
|
|
* Constructs a new instance.
|
|
|
|
*
|
|
|
|
* @example Usage
|
2024-06-03 05:44:18 +00:00
|
|
|
* ```ts
|
|
|
|
* import { parse, ParseError } from "@std/csv/parse";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 03:32:09 +00:00
|
|
|
*
|
2024-06-03 05:44:18 +00:00
|
|
|
* try {
|
|
|
|
* parse(`a "word","b"`);
|
|
|
|
* } catch (error) {
|
|
|
|
* if (error instanceof ParseError) {
|
|
|
|
* assertEquals(error.message, `parse error on line 1, column 2: bare " in non-quoted-field`);
|
|
|
|
* }
|
|
|
|
* }
|
2024-06-03 03:32:09 +00:00
|
|
|
* ```
|
|
|
|
*
|
|
|
|
* @param start Line where the record starts
|
|
|
|
* @param line Line where the error occurred
|
|
|
|
* @param column Column The index where the error occurred
|
|
|
|
* @param message Error message
|
|
|
|
*/
|
2022-04-06 04:46:45 +00:00
|
|
|
constructor(
|
|
|
|
start: number,
|
|
|
|
line: number,
|
|
|
|
column: number | null,
|
|
|
|
message: string,
|
|
|
|
) {
|
|
|
|
super();
|
|
|
|
this.startLine = start;
|
|
|
|
this.column = column;
|
|
|
|
this.line = line;
|
|
|
|
|
|
|
|
if (message === ERR_FIELD_COUNT) {
|
|
|
|
this.message = `record on line ${line}: ${message}`;
|
|
|
|
} else if (start !== line) {
|
|
|
|
this.message =
|
|
|
|
`record on line ${start}; parse error on line ${line}, column ${column}: ${message}`;
|
|
|
|
} else {
|
|
|
|
this.message =
|
|
|
|
`parse error on line ${line}, column ${column}: ${message}`;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';
|
|
|
|
export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
|
|
|
|
export const ERR_INVALID_DELIM = "Invalid Delimiter";
|
|
|
|
export const ERR_FIELD_COUNT = "wrong number of fields";
|
2023-02-17 07:57:34 +00:00
|
|
|
|
|
|
|
export function convertRowToObject(
|
|
|
|
row: string[],
|
2023-04-04 04:53:16 +00:00
|
|
|
headers: readonly string[],
|
2023-02-17 07:57:34 +00:00
|
|
|
index: number,
|
|
|
|
) {
|
|
|
|
if (row.length !== headers.length) {
|
|
|
|
throw new Error(
|
|
|
|
`Error number of fields line: ${index}\nNumber of fields found: ${headers.length}\nExpected number of fields: ${row.length}`,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
const out: Record<string, unknown> = {};
|
2024-01-10 21:35:50 +00:00
|
|
|
for (const [index, header] of headers.entries()) {
|
|
|
|
out[header] = row[index];
|
2023-02-17 07:57:34 +00:00
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2024-06-27 07:24:30 +00:00
|
|
|
/** Parse result type for {@linkcode parse} and {@linkcode CsvParseStream}. */
|
2023-04-04 04:53:16 +00:00
|
|
|
export type ParseResult<ParseOptions, T> =
|
|
|
|
// If `columns` option is specified, the return type is Record type.
|
|
|
|
T extends ParseOptions & { columns: readonly (infer C extends string)[] }
|
|
|
|
? RecordWithColumn<C>[]
|
2024-01-11 06:02:30 +00:00
|
|
|
// If `skipFirstRow` option is specified, the return type is Record type.
|
|
|
|
: T extends ParseOptions & { skipFirstRow: true }
|
|
|
|
? Record<string, string | undefined>[]
|
|
|
|
// If `columns` and `skipFirstRow` option is _not_ specified, the return type is string[][].
|
|
|
|
: T extends
|
|
|
|
ParseOptions & { columns?: undefined; skipFirstRow?: false | undefined }
|
|
|
|
? string[][]
|
|
|
|
// else, the return type is Record type or string[][].
|
|
|
|
: Record<string, string | undefined>[] | string[][];
|
2023-04-04 04:53:16 +00:00
|
|
|
|
2024-01-11 06:02:30 +00:00
|
|
|
/**
|
|
|
|
* Record type with column type.
|
|
|
|
*
|
|
|
|
* @example
|
|
|
|
* ```
|
|
|
|
* type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string>
|
|
|
|
* type RecordWithColumn<string> => Record<string, string | undefined>
|
|
|
|
* ```
|
|
|
|
*/
|
|
|
|
export type RecordWithColumn<C extends string> = string extends C
|
2023-04-04 04:53:16 +00:00
|
|
|
? Record<string, string | undefined>
|
|
|
|
: Record<C, string>;
|