mirror of
https://github.com/denoland/std.git
synced 2024-11-22 04:59:05 +00:00
165 lines
5.2 KiB
TypeScript
165 lines
5.2 KiB
TypeScript
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
|
import { toTransformStream } from "@std/streams/to-transform-stream";
|
|
import type { JsonValue, ParseStreamOptions } from "./common.ts";
|
|
import { parse } from "./_common.ts";
|
|
|
|
function isBlankChar(char: string | undefined) {
|
|
return char !== undefined && [" ", "\t", "\r", "\n"].includes(char);
|
|
}
|
|
|
|
const primitives = new Map(
|
|
(["null", "true", "false"] as const).map((v) => [v[0], v]),
|
|
);
|
|
|
|
/**
|
|
* Stream to parse {@link https://en.wikipedia.org/wiki/JSON_streaming#Concatenated_JSON|Concatenated JSON}.
|
|
*
|
|
* @example
|
|
* ```ts
|
|
* import { ConcatenatedJsonParseStream } from "@std/json/concatenated-json-parse-stream";
|
|
*
|
|
* const url = "@std/json/testdata/test.concatenated-json";
|
|
* const { body } = await fetch(url);
|
|
*
|
|
* const readable = body!
|
|
* .pipeThrough(new TextDecoderStream()) // convert Uint8Array to string
|
|
* .pipeThrough(new ConcatenatedJsonParseStream()); // parse Concatenated JSON
|
|
*
|
|
* for await (const data of readable) {
|
|
* console.log(data);
|
|
* }
|
|
* ```
|
|
*/
|
|
export class ConcatenatedJsonParseStream
|
|
implements TransformStream<string, JsonValue> {
|
|
/** A writable stream of byte data. */
|
|
readonly writable: WritableStream<string>;
|
|
/** A readable stream of byte data. */
|
|
readonly readable: ReadableStream<JsonValue>;
|
|
|
|
/** Constructs a new instance. */
|
|
constructor({ writableStrategy, readableStrategy }: ParseStreamOptions = {}) {
|
|
const { writable, readable } = toTransformStream(
|
|
this.#concatenatedJSONIterator,
|
|
writableStrategy,
|
|
readableStrategy,
|
|
);
|
|
this.writable = writable;
|
|
this.readable = readable;
|
|
}
|
|
|
|
async *#concatenatedJSONIterator(src: AsyncIterable<string>) {
|
|
// Counts the number of '{', '}', '[', ']', and when the nesting level reaches 0, concatenates and returns the string.
|
|
let targetString = "";
|
|
let hasValue = false;
|
|
let nestCount = 0;
|
|
let readingString = false;
|
|
let escapeNext = false;
|
|
let readingPrimitive: false | "null" | "true" | "false" = false;
|
|
let positionInPrimitive = 0;
|
|
for await (const string of src) {
|
|
let sliceStart = 0;
|
|
for (let i = 0; i < string.length; i++) {
|
|
const char = string[i];
|
|
|
|
// We're reading a primitive at the top level
|
|
if (readingPrimitive) {
|
|
if (char === readingPrimitive[positionInPrimitive]) {
|
|
positionInPrimitive++;
|
|
|
|
// Emit the primitive when done reading
|
|
if (positionInPrimitive === readingPrimitive.length) {
|
|
yield parse(targetString + string.slice(sliceStart, i + 1));
|
|
hasValue = false;
|
|
readingPrimitive = false;
|
|
positionInPrimitive = 0;
|
|
targetString = "";
|
|
sliceStart = i + 1;
|
|
}
|
|
} else {
|
|
// If the primitive is malformed, keep reading, maybe the next characters can be useful in the syntax error.
|
|
readingPrimitive = false;
|
|
positionInPrimitive = 0;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (readingString) {
|
|
if (char === '"' && !escapeNext) {
|
|
readingString = false;
|
|
|
|
// When the nesting level is 0, it returns a string when '"' comes.
|
|
if (nestCount === 0 && hasValue) {
|
|
yield parse(targetString + string.slice(sliceStart, i + 1));
|
|
hasValue = false;
|
|
targetString = "";
|
|
sliceStart = i + 1;
|
|
}
|
|
}
|
|
escapeNext = !escapeNext && char === "\\";
|
|
continue;
|
|
}
|
|
|
|
// Parses number with a nesting level of 0.
|
|
// example: '0["foo"]' => 0, ["foo"]
|
|
// example: '3.14{"foo": "bar"}' => 3.14, {foo: "bar"}
|
|
if (
|
|
hasValue && nestCount === 0 &&
|
|
(char === "{" || char === "[" || char === '"' || char === " " ||
|
|
char === "n" || char === "t" || char === "f")
|
|
) {
|
|
yield parse(targetString + string.slice(sliceStart, i));
|
|
hasValue = false;
|
|
readingString = false;
|
|
targetString = "";
|
|
sliceStart = i;
|
|
i--;
|
|
continue;
|
|
}
|
|
|
|
switch (char) {
|
|
case '"':
|
|
readingString = true;
|
|
escapeNext = false;
|
|
break;
|
|
case "{":
|
|
case "[":
|
|
nestCount++;
|
|
break;
|
|
case "}":
|
|
case "]":
|
|
nestCount--;
|
|
break;
|
|
}
|
|
|
|
if (nestCount === 0 && primitives.has(char)) {
|
|
// The first letter of a primitive at top level was found
|
|
readingPrimitive = primitives.get(char)!;
|
|
positionInPrimitive = 1;
|
|
}
|
|
|
|
// parse object or array
|
|
if (
|
|
hasValue && nestCount === 0 &&
|
|
(char === "}" || char === "]")
|
|
) {
|
|
yield parse(targetString + string.slice(sliceStart, i + 1));
|
|
hasValue = false;
|
|
targetString = "";
|
|
sliceStart = i + 1;
|
|
continue;
|
|
}
|
|
|
|
if (!hasValue && !isBlankChar(char)) {
|
|
// We want to ignore the character string with only blank, so if there is a character other than blank, record it.
|
|
hasValue = true;
|
|
}
|
|
}
|
|
targetString += string.slice(sliceStart);
|
|
}
|
|
if (hasValue) {
|
|
yield parse(targetString);
|
|
}
|
|
}
|
|
}
|