std/streams/delimiter.ts

220 lines
6.0 KiB
TypeScript

// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.
import { BytesList } from "../bytes/bytes_list.ts";
interface TextLineStreamOptions {
/** Allow splitting by solo \r */
allowCR: boolean;
}
/** Transform a stream into a stream where each chunk is divided by a newline,
* be it `\n` or `\r\n`. `\r` can be enabled via the `allowCR` option.
*
* ```ts
* import { TextLineStream } from "./delimiter.ts";
* const res = await fetch("https://example.com");
* const lines = res.body!
* .pipeThrough(new TextDecoderStream())
* .pipeThrough(new TextLineStream());
* ```
*/
export class TextLineStream extends TransformStream<string, string> {
readonly #allowCR: boolean;
#buf = "";
constructor(options?: TextLineStreamOptions) {
super({
transform: (chunk, controller) => this.#handle(chunk, controller),
flush: (controller) => this.#handle("\r\n", controller),
});
this.#allowCR = options?.allowCR ?? false;
}
#handle(chunk: string, controller: TransformStreamDefaultController<string>) {
chunk = this.#buf + chunk;
for (;;) {
const lfIndex = chunk.indexOf("\n");
if (this.#allowCR) {
const crIndex = chunk.indexOf("\r");
if (
crIndex !== -1 && crIndex !== (chunk.length - 1) &&
(lfIndex === -1 || (lfIndex - 1) > crIndex)
) {
controller.enqueue(chunk.slice(0, crIndex));
chunk = chunk.slice(crIndex + 1);
continue;
}
}
if (lfIndex !== -1) {
let crOrLfIndex = lfIndex;
if (chunk[lfIndex - 1] === "\r") {
crOrLfIndex--;
}
controller.enqueue(chunk.slice(0, crOrLfIndex));
chunk = chunk.slice(lfIndex + 1);
continue;
}
break;
}
this.#buf = chunk;
}
}
/** Transform a stream into a stream where each chunk is divided by a given delimiter.
*
* ```ts
* import { DelimiterStream } from "./delimiter.ts";
* const res = await fetch("https://example.com");
* const parts = res.body!
* .pipeThrough(new DelimiterStream(new TextEncoder().encode("foo")))
* .pipeThrough(new TextDecoderStream());
* ```
*/
export class DelimiterStream extends TransformStream<Uint8Array, Uint8Array> {
#bufs = new BytesList();
#delimiter: Uint8Array;
#inspectIndex = 0;
#matchIndex = 0;
#delimLen: number;
#delimLPS: Uint8Array;
constructor(delimiter: Uint8Array) {
super({
transform: (chunk, controller) => {
this.#handle(chunk, controller);
},
flush: (controller) => {
controller.enqueue(this.#bufs.concat());
},
});
this.#delimiter = delimiter;
this.#delimLen = delimiter.length;
this.#delimLPS = createLPS(delimiter);
}
#handle(
chunk: Uint8Array,
controller: TransformStreamDefaultController<Uint8Array>,
) {
this.#bufs.add(chunk);
let localIndex = 0;
while (this.#inspectIndex < this.#bufs.size()) {
if (chunk[localIndex] === this.#delimiter[this.#matchIndex]) {
this.#inspectIndex++;
localIndex++;
this.#matchIndex++;
if (this.#matchIndex === this.#delimLen) {
// Full match
const matchEnd = this.#inspectIndex - this.#delimLen;
const readyBytes = this.#bufs.slice(0, matchEnd);
controller.enqueue(readyBytes);
// Reset match, different from KMP.
this.#bufs.shift(this.#inspectIndex);
this.#inspectIndex = 0;
this.#matchIndex = 0;
}
} else {
if (this.#matchIndex === 0) {
this.#inspectIndex++;
localIndex++;
} else {
this.#matchIndex = this.#delimLPS[this.#matchIndex - 1];
}
}
}
}
}
/** Transform a stream into a stream where each chunk is divided by a given delimiter.
*
* ```ts
* import { TextDelimiterStream } from "./delimiter.ts";
* const res = await fetch("https://example.com");
* const parts = res.body!
* .pipeThrough(new TextDecoderStream())
* .pipeThrough(new TextDelimiterStream("foo"));
* ```
*/
export class TextDelimiterStream extends TransformStream<string, string> {
#buf = "";
#delimiter: string;
#inspectIndex = 0;
#matchIndex = 0;
#delimLPS: Uint8Array;
constructor(delimiter: string) {
super({
transform: (chunk, controller) => {
this.#handle(chunk, controller);
},
flush: (controller) => {
controller.enqueue(this.#buf);
},
});
this.#delimiter = delimiter;
this.#delimLPS = createLPS(new TextEncoder().encode(delimiter));
}
#handle(
chunk: string,
controller: TransformStreamDefaultController<string>,
) {
this.#buf += chunk;
let localIndex = 0;
while (this.#inspectIndex < this.#buf.length) {
if (chunk[localIndex] === this.#delimiter[this.#matchIndex]) {
this.#inspectIndex++;
localIndex++;
this.#matchIndex++;
if (this.#matchIndex === this.#delimiter.length) {
// Full match
const matchEnd = this.#inspectIndex - this.#delimiter.length;
const readyString = this.#buf.slice(0, matchEnd);
controller.enqueue(readyString);
// Reset match, different from KMP.
this.#buf = this.#buf.slice(this.#inspectIndex);
this.#inspectIndex = 0;
this.#matchIndex = 0;
}
} else {
if (this.#matchIndex === 0) {
this.#inspectIndex++;
localIndex++;
} else {
this.#matchIndex = this.#delimLPS[this.#matchIndex - 1];
}
}
}
}
}
/** Generate longest proper prefix which is also suffix array. */
function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
if (pat[i] == pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;
i++;
} else if (prefixEnd === 0) {
lps[i] = 0;
i++;
} else {
prefixEnd = lps[prefixEnd - 1];
}
}
return lps;
}