std/io/read_delim.ts
Yoshiya Hinosawa d93b33aff8
docs(io): document std/io (#5656)
Co-authored-by: Asher Gomez <ashersaupingomez@gmail.com>
2024-08-08 23:20:43 +09:00

101 lines
2.8 KiB
TypeScript

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.
import { concat } from "@std/bytes/concat";
import type { Reader } from "./types.ts";
/** Generate longest proper prefix which is also suffix array. */
function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
if (pat[i] === pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;
i++;
} else if (prefixEnd === 0) {
lps[i] = 0;
i++;
} else {
prefixEnd = lps[prefixEnd - 1]!;
}
}
return lps;
}
/**
* Read delimited bytes from a {@linkcode Reader} through an
* {@linkcode AsyncIterableIterator} of {@linkcode Uint8Array}.
*
* @example Usage
* ```ts
* import { readDelim } from "@std/io/read-delim";
* import { assert } from "@std/assert/assert"
*
* const fileReader = await Deno.open("README.md");
* for await (const chunk of readDelim(fileReader, new TextEncoder().encode("\n"))) {
* assert(chunk instanceof Uint8Array);
* }
* ```
*
* @param reader The reader to read from
* @param delim The delimiter to read until
* @returns The {@linkcode AsyncIterableIterator} of {@linkcode Uint8Array}s.
*
* @deprecated This will be removed in 1.0.0. Use the {@link https://developer.mozilla.org/en-US/docs/Web/API/Streams_API | Web Streams API} instead.
*/
export async function* readDelim(
reader: Reader,
delim: Uint8Array,
): AsyncIterableIterator<Uint8Array> {
// Avoid unicode problems
const delimLen = delim.length;
const delimLPS = createLPS(delim);
let chunks = new Uint8Array();
const bufSize = Math.max(1024, delimLen + 1);
// Modified KMP
let inspectIndex = 0;
let matchIndex = 0;
while (true) {
const inspectArr = new Uint8Array(bufSize);
const result = await reader.read(inspectArr);
if (result === null) {
// Yield last chunk.
yield chunks;
return;
} else if (result < 0) {
// Discard all remaining and silently fail.
return;
}
chunks = concat([chunks, inspectArr.slice(0, result)]);
let localIndex = 0;
while (inspectIndex < chunks.length) {
if (inspectArr[localIndex] === delim[matchIndex]) {
inspectIndex++;
localIndex++;
matchIndex++;
if (matchIndex === delimLen) {
// Full match
const matchEnd = inspectIndex - delimLen;
const readyBytes = chunks.slice(0, matchEnd);
yield readyBytes;
// Reset match, different from KMP.
chunks = chunks.slice(inspectIndex);
inspectIndex = 0;
matchIndex = 0;
}
} else {
if (matchIndex === 0) {
inspectIndex++;
localIndex++;
} else {
matchIndex = delimLPS[matchIndex - 1]!;
}
}
}
}
}