2024-01-01 21:11:32 +00:00
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2023-03-18 12:36:00 +00:00
|
|
|
// This module is browser compatible.
|
2022-11-29 13:55:38 +00:00
|
|
|
|
|
|
|
import { createLPS } from "./_common.ts";
|
|
|
|
|
2023-03-29 22:21:00 +00:00
|
|
|
import type {
|
|
|
|
DelimiterDisposition,
|
|
|
|
DelimiterStreamOptions,
|
|
|
|
} from "./delimiter_stream.ts";
|
|
|
|
|
2023-12-04 06:12:52 +00:00
|
|
|
/**
|
2024-05-28 01:27:40 +00:00
|
|
|
* Transform a stream `string` into a stream where each chunk is divided by a
|
|
|
|
* given delimiter.
|
2022-11-29 13:55:38 +00:00
|
|
|
*
|
2024-05-28 01:27:40 +00:00
|
|
|
* If you are working with a stream of `Uint8Array`, consider using {@linkcode DelimiterStream}.
|
|
|
|
*
|
|
|
|
* If you want to split by a newline, consider using {@linkcode TextLineStream}.
|
|
|
|
*
|
|
|
|
* @example Comma-separated values
|
2022-11-29 13:55:38 +00:00
|
|
|
* ```ts
|
2024-04-29 02:57:30 +00:00
|
|
|
* import { TextDelimiterStream } from "@std/streams/text-delimiter-stream";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-05-28 01:27:40 +00:00
|
|
|
*
|
|
|
|
* const stream = ReadableStream.from([
|
|
|
|
* "alice,20,",
|
|
|
|
* ",US,",
|
|
|
|
* ]);
|
|
|
|
*
|
|
|
|
* const valueStream = stream.pipeThrough(new TextDelimiterStream(","));
|
|
|
|
*
|
|
|
|
* assertEquals(
|
|
|
|
* await Array.fromAsync(valueStream),
|
|
|
|
* ["alice", "20", "", "US", ""],
|
|
|
|
* );
|
|
|
|
* ```
|
|
|
|
*
|
|
|
|
* @example Semicolon-separated values with suffix disposition
|
|
|
|
* ```ts
|
|
|
|
* import { TextDelimiterStream } from "@std/streams/text-delimiter-stream";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-05-28 01:27:40 +00:00
|
|
|
*
|
|
|
|
* const stream = ReadableStream.from([
|
|
|
|
* "const a = 42;;let b =",
|
|
|
|
* " true;",
|
|
|
|
* ]);
|
|
|
|
*
|
|
|
|
* const valueStream = stream.pipeThrough(
|
|
|
|
* new TextDelimiterStream(";", { disposition: "suffix" }),
|
|
|
|
* );
|
|
|
|
*
|
|
|
|
* assertEquals(
|
|
|
|
* await Array.fromAsync(valueStream),
|
|
|
|
* ["const a = 42;", ";", "let b = true;", ""],
|
|
|
|
* );
|
2022-11-29 13:55:38 +00:00
|
|
|
* ```
|
|
|
|
*/
|
|
|
|
export class TextDelimiterStream extends TransformStream<string, string> {
|
|
|
|
#buf = "";
|
|
|
|
#delimiter: string;
|
|
|
|
#inspectIndex = 0;
|
|
|
|
#matchIndex = 0;
|
|
|
|
#delimLPS: Uint8Array;
|
2023-03-29 22:21:00 +00:00
|
|
|
#disp: DelimiterDisposition;
|
2022-11-29 13:55:38 +00:00
|
|
|
|
2024-05-28 01:27:40 +00:00
|
|
|
/**
|
|
|
|
* Constructs a new instance.
|
|
|
|
*
|
|
|
|
* @param delimiter A delimiter to split the stream by.
|
|
|
|
* @param options Options for the stream.
|
|
|
|
*/
|
|
|
|
constructor(
|
|
|
|
delimiter: string,
|
2024-06-20 02:58:45 +00:00
|
|
|
options?: DelimiterStreamOptions,
|
2024-05-28 01:27:40 +00:00
|
|
|
) {
|
2022-11-29 13:55:38 +00:00
|
|
|
super({
|
|
|
|
transform: (chunk, controller) => {
|
|
|
|
this.#handle(chunk, controller);
|
|
|
|
},
|
|
|
|
flush: (controller) => {
|
|
|
|
controller.enqueue(this.#buf);
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
this.#delimiter = delimiter;
|
|
|
|
this.#delimLPS = createLPS(new TextEncoder().encode(delimiter));
|
2024-06-20 02:58:45 +00:00
|
|
|
this.#disp = options?.disposition ?? "discard";
|
2022-11-29 13:55:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#handle(
|
|
|
|
chunk: string,
|
|
|
|
controller: TransformStreamDefaultController<string>,
|
|
|
|
) {
|
|
|
|
this.#buf += chunk;
|
|
|
|
let localIndex = 0;
|
|
|
|
while (this.#inspectIndex < this.#buf.length) {
|
|
|
|
if (chunk[localIndex] === this.#delimiter[this.#matchIndex]) {
|
|
|
|
this.#inspectIndex++;
|
|
|
|
localIndex++;
|
|
|
|
this.#matchIndex++;
|
|
|
|
if (this.#matchIndex === this.#delimiter.length) {
|
|
|
|
// Full match
|
2023-03-29 22:21:00 +00:00
|
|
|
const start = this.#inspectIndex - this.#delimiter.length;
|
|
|
|
const end = this.#disp === "suffix" ? this.#inspectIndex : start;
|
|
|
|
const copy = this.#buf.slice(0, end);
|
|
|
|
controller.enqueue(copy);
|
2023-08-25 09:04:43 +00:00
|
|
|
const shift = this.#disp === "prefix" ? start : this.#inspectIndex;
|
2023-03-29 22:21:00 +00:00
|
|
|
this.#buf = this.#buf.slice(shift);
|
2023-08-25 09:04:43 +00:00
|
|
|
this.#inspectIndex = this.#disp === "prefix"
|
2023-03-29 22:21:00 +00:00
|
|
|
? this.#delimiter.length
|
|
|
|
: 0;
|
2022-11-29 13:55:38 +00:00
|
|
|
this.#matchIndex = 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (this.#matchIndex === 0) {
|
|
|
|
this.#inspectIndex++;
|
|
|
|
localIndex++;
|
|
|
|
} else {
|
2024-02-24 20:22:50 +00:00
|
|
|
this.#matchIndex = this.#delimLPS[this.#matchIndex - 1]!;
|
2022-11-29 13:55:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|