2024-01-01 21:11:32 +00:00
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2024-04-29 02:57:30 +00:00
|
|
|
import { toTransformStream } from "@std/streams/to-transform-stream";
|
2024-06-21 05:56:11 +00:00
|
|
|
import type { JsonValue } from "./types.ts";
|
2023-03-13 05:56:53 +00:00
|
|
|
import { parse } from "./_common.ts";
|
2022-06-20 00:06:37 +00:00
|
|
|
|
2024-01-11 00:58:48 +00:00
|
|
|
function isBlankChar(char: string | undefined) {
|
|
|
|
return char !== undefined && [" ", "\t", "\r", "\n"].includes(char);
|
2022-06-20 00:06:37 +00:00
|
|
|
}
|
|
|
|
|
2023-06-27 01:32:01 +00:00
|
|
|
const primitives = new Map(
|
|
|
|
(["null", "true", "false"] as const).map((v) => [v[0], v]),
|
|
|
|
);
|
|
|
|
|
2022-06-20 00:06:37 +00:00
|
|
|
/**
|
2024-06-18 10:10:57 +00:00
|
|
|
* Stream to parse
|
|
|
|
* {@link https://en.wikipedia.org/wiki/JSON_streaming#Concatenated_JSON | Concatenated JSON}.
|
|
|
|
*
|
|
|
|
* @example Usage
|
2022-06-20 00:06:37 +00:00
|
|
|
*
|
|
|
|
* ```ts
|
2024-04-29 02:57:30 +00:00
|
|
|
* import { ConcatenatedJsonParseStream } from "@std/json/concatenated-json-parse-stream";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2022-06-20 00:06:37 +00:00
|
|
|
*
|
2024-06-18 10:10:57 +00:00
|
|
|
* const stream = ReadableStream.from([
|
|
|
|
* `{"foo":"bar"}`,
|
|
|
|
* `{"baz":100}`,
|
|
|
|
* ]).pipeThrough(new ConcatenatedJsonParseStream());
|
2022-06-20 00:06:37 +00:00
|
|
|
*
|
2024-06-18 10:10:57 +00:00
|
|
|
* assertEquals(await Array.fromAsync(stream), [
|
|
|
|
* { foo: "bar" },
|
|
|
|
* { baz: 100 },
|
|
|
|
* ]);
|
2022-06-20 00:06:37 +00:00
|
|
|
* ```
|
|
|
|
*/
|
2022-08-30 07:08:43 +00:00
|
|
|
export class ConcatenatedJsonParseStream
|
|
|
|
implements TransformStream<string, JsonValue> {
|
2024-06-18 10:10:57 +00:00
|
|
|
/**
|
|
|
|
* A writable stream of byte data.
|
|
|
|
*
|
|
|
|
* @example Usage
|
|
|
|
* ```ts
|
|
|
|
* import { ConcatenatedJsonParseStream } from "@std/json/concatenated-json-parse-stream";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-18 10:10:57 +00:00
|
|
|
*
|
|
|
|
* const stream = ReadableStream.from([
|
|
|
|
* `{"foo":"bar"}`,
|
|
|
|
* `{"baz":100}`,
|
|
|
|
* ]).pipeThrough(new ConcatenatedJsonParseStream());
|
|
|
|
*
|
|
|
|
* assertEquals(await Array.fromAsync(stream), [
|
|
|
|
* { foo: "bar" },
|
|
|
|
* { baz: 100 },
|
|
|
|
* ]);
|
|
|
|
* ```
|
|
|
|
*/
|
2022-06-20 00:06:37 +00:00
|
|
|
readonly writable: WritableStream<string>;
|
2024-06-18 10:10:57 +00:00
|
|
|
/**
|
|
|
|
* A readable stream of byte data.
|
|
|
|
*
|
|
|
|
* @example Usage
|
|
|
|
* ```ts
|
|
|
|
* import { ConcatenatedJsonParseStream } from "@std/json/concatenated-json-parse-stream";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-18 10:10:57 +00:00
|
|
|
*
|
|
|
|
* const stream = ReadableStream.from([
|
|
|
|
* `{"foo":"bar"}`,
|
|
|
|
* `{"baz":100}`,
|
|
|
|
* ]).pipeThrough(new ConcatenatedJsonParseStream());
|
|
|
|
*
|
|
|
|
* assertEquals(await Array.fromAsync(stream), [
|
|
|
|
* { foo: "bar" },
|
|
|
|
* { baz: 100 },
|
|
|
|
* ]);
|
|
|
|
* ```
|
|
|
|
*/
|
2022-08-30 07:08:43 +00:00
|
|
|
readonly readable: ReadableStream<JsonValue>;
|
2024-01-05 05:37:42 +00:00
|
|
|
|
2024-06-18 10:10:57 +00:00
|
|
|
/**
|
|
|
|
* Constructs a new instance.
|
|
|
|
*/
|
2024-06-21 04:20:36 +00:00
|
|
|
constructor() {
|
2022-06-20 00:06:37 +00:00
|
|
|
const { writable, readable } = toTransformStream(
|
|
|
|
this.#concatenatedJSONIterator,
|
|
|
|
);
|
|
|
|
this.writable = writable;
|
|
|
|
this.readable = readable;
|
|
|
|
}
|
|
|
|
|
|
|
|
async *#concatenatedJSONIterator(src: AsyncIterable<string>) {
|
|
|
|
// Counts the number of '{', '}', '[', ']', and when the nesting level reaches 0, concatenates and returns the string.
|
|
|
|
let targetString = "";
|
|
|
|
let hasValue = false;
|
|
|
|
let nestCount = 0;
|
|
|
|
let readingString = false;
|
|
|
|
let escapeNext = false;
|
2023-06-27 01:32:01 +00:00
|
|
|
let readingPrimitive: false | "null" | "true" | "false" = false;
|
|
|
|
let positionInPrimitive = 0;
|
2022-06-20 00:06:37 +00:00
|
|
|
for await (const string of src) {
|
|
|
|
let sliceStart = 0;
|
|
|
|
for (let i = 0; i < string.length; i++) {
|
|
|
|
const char = string[i];
|
|
|
|
|
2023-06-27 01:32:01 +00:00
|
|
|
// We're reading a primitive at the top level
|
|
|
|
if (readingPrimitive) {
|
|
|
|
if (char === readingPrimitive[positionInPrimitive]) {
|
|
|
|
positionInPrimitive++;
|
|
|
|
|
|
|
|
// Emit the primitive when done reading
|
|
|
|
if (positionInPrimitive === readingPrimitive.length) {
|
|
|
|
yield parse(targetString + string.slice(sliceStart, i + 1));
|
|
|
|
hasValue = false;
|
|
|
|
readingPrimitive = false;
|
|
|
|
positionInPrimitive = 0;
|
|
|
|
targetString = "";
|
|
|
|
sliceStart = i + 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// If the primitive is malformed, keep reading, maybe the next characters can be useful in the syntax error.
|
|
|
|
readingPrimitive = false;
|
|
|
|
positionInPrimitive = 0;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-06-20 00:06:37 +00:00
|
|
|
if (readingString) {
|
|
|
|
if (char === '"' && !escapeNext) {
|
|
|
|
readingString = false;
|
|
|
|
|
|
|
|
// When the nesting level is 0, it returns a string when '"' comes.
|
|
|
|
if (nestCount === 0 && hasValue) {
|
|
|
|
yield parse(targetString + string.slice(sliceStart, i + 1));
|
|
|
|
hasValue = false;
|
|
|
|
targetString = "";
|
|
|
|
sliceStart = i + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
escapeNext = !escapeNext && char === "\\";
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2023-06-27 01:32:01 +00:00
|
|
|
// Parses number with a nesting level of 0.
|
|
|
|
// example: '0["foo"]' => 0, ["foo"]
|
|
|
|
// example: '3.14{"foo": "bar"}' => 3.14, {foo: "bar"}
|
2022-06-20 00:06:37 +00:00
|
|
|
if (
|
|
|
|
hasValue && nestCount === 0 &&
|
2023-06-27 01:32:01 +00:00
|
|
|
(char === "{" || char === "[" || char === '"' || char === " " ||
|
|
|
|
char === "n" || char === "t" || char === "f")
|
2022-06-20 00:06:37 +00:00
|
|
|
) {
|
|
|
|
yield parse(targetString + string.slice(sliceStart, i));
|
|
|
|
hasValue = false;
|
|
|
|
readingString = false;
|
|
|
|
targetString = "";
|
|
|
|
sliceStart = i;
|
|
|
|
i--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (char) {
|
|
|
|
case '"':
|
|
|
|
readingString = true;
|
|
|
|
escapeNext = false;
|
|
|
|
break;
|
|
|
|
case "{":
|
|
|
|
case "[":
|
|
|
|
nestCount++;
|
|
|
|
break;
|
|
|
|
case "}":
|
|
|
|
case "]":
|
|
|
|
nestCount--;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-06-27 01:32:01 +00:00
|
|
|
if (nestCount === 0 && primitives.has(char)) {
|
|
|
|
// The first letter of a primitive at top level was found
|
|
|
|
readingPrimitive = primitives.get(char)!;
|
|
|
|
positionInPrimitive = 1;
|
|
|
|
}
|
|
|
|
|
2022-06-20 00:06:37 +00:00
|
|
|
// parse object or array
|
|
|
|
if (
|
|
|
|
hasValue && nestCount === 0 &&
|
|
|
|
(char === "}" || char === "]")
|
|
|
|
) {
|
|
|
|
yield parse(targetString + string.slice(sliceStart, i + 1));
|
|
|
|
hasValue = false;
|
|
|
|
targetString = "";
|
|
|
|
sliceStart = i + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2023-06-27 01:32:01 +00:00
|
|
|
if (!hasValue && !isBlankChar(char)) {
|
2022-06-20 00:06:37 +00:00
|
|
|
// We want to ignore the character string with only blank, so if there is a character other than blank, record it.
|
|
|
|
hasValue = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
targetString += string.slice(sliceStart);
|
|
|
|
}
|
|
|
|
if (hasValue) {
|
|
|
|
yield parse(targetString);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|