BREAKING(bytes): deprecate BytesList class (#3589)

This commit is contained in:
Aapo Alasuutari 2023-08-31 13:40:32 +03:00 committed by GitHub
parent 7fc5cdacab
commit 6d75c4ed7f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 214 additions and 91 deletions

View File

@ -3,6 +3,8 @@
/**
* An abstraction of multiple Uint8Arrays
*
* @deprecated (will be removed in 0.205.0) Use a plain array of Uint8Arrays instead.
*/
export class BytesList {
#len = 0;
@ -16,12 +18,20 @@ export class BytesList {
/**
* Total size of bytes
*
* @deprecated
*/
size() {
return this.#len;
}
/**
* Push bytes with given offset infos
*
* @deprecated Use a plain array of Uint8Arrays instead.
* Adding into the array can be done with {@linkcode Array#push}.
* If {@linkcode start} or {@linkcode end} parameters are
* used then use {@linkcode Uint8Array#subarray}
* to slice the needed part without copying.
*/
add(value: Uint8Array, start = 0, end = value.byteLength) {
if (value.byteLength === 0 || end - start === 0) {
@ -39,6 +49,16 @@ export class BytesList {
/**
* Drop head `n` bytes.
*
* @deprecated Use a plain array of Uint8Arrays instead.
* Shifting from the array can be done using conditional
* {@linkcode Array#shift}s against the number of bytes left
* to be dropped.
*
* If the next item in the array is longer than the number
* of bytes left to be dropped, then instead of shifting it out
* it should be replaced in-place with a subarray of itself that
* drops the remaining bytes from the front.
*/
shift(n: number) {
if (n === 0) {
@ -67,6 +87,12 @@ export class BytesList {
/**
* Find chunk index in which `pos` locates by binary-search
* returns -1 if out of range
*
* @deprecated Use a plain array of Uint8Arrays instead.
* Finding the index of a chunk in the array can be
* done using {@linkcode Array#findIndex} with a counter
* for the number of bytes already encountered from past
* chunks' {@linkcode Uint8Array#byteLength}.
*/
getChunkIndex(pos: number): number {
let max = this.#chunks.length;
@ -90,6 +116,10 @@ export class BytesList {
/**
* Get indexed byte from chunks
*
* @deprecated Use a plain array of Uint8Arrays instead.
* See {@linkcode getChunkIndex} for finding a chunk
* by number of bytes.
*/
get(i: number): number {
if (i < 0 || this.#len <= i) {
@ -102,6 +132,8 @@ export class BytesList {
/**
* Iterator of bytes from given position
*
* @deprecated Use a plain array of Uint8Arrays instead.
*/
*iterator(start = 0): IterableIterator<number> {
const startIdx = this.getChunkIndex(start);
@ -119,6 +151,13 @@ export class BytesList {
/**
* Returns subset of bytes copied
*
* @deprecated Use a plain array of Uint8Arrays instead.
* For copying the whole list see {@linkcode concat}.
* For copying subarrays find the start and end chunk indexes
* and the internal indexes within those Uint8Arrays, prepare
* a Uint8Array of size `end - start` and set the chunks (or
* chunk subarrays) into that at proper offsets.
*/
slice(start: number, end: number = this.#len): Uint8Array {
if (end === start) {
@ -146,6 +185,8 @@ export class BytesList {
}
/**
* Concatenate chunks into single Uint8Array copied.
*
* @deprecated Use a plain array of Uint8Arrays and the `concat.ts` module instead.
*/
concat(): Uint8Array {
const result = new Uint8Array(this.#len);

View File

@ -1,6 +1,6 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import { BytesList } from "../bytes/bytes_list.ts";
import { concat } from "../bytes/concat.ts";
export type ValueType =
| number
@ -48,9 +48,9 @@ const encoder = new TextEncoder();
* ```
*/
export function encode(object: ValueType) {
const byteList = new BytesList();
encodeSlice(object, byteList);
return byteList.concat();
const byteParts: Uint8Array[] = [];
encodeSlice(object, byteParts);
return concat(...byteParts);
}
function encodeFloat64(num: number) {
@ -119,24 +119,24 @@ function encodeNumber(num: number) {
return encodeFloat64(num);
}
function encodeSlice(object: ValueType, byteList: BytesList) {
function encodeSlice(object: ValueType, byteParts: Uint8Array[]) {
if (object === null) {
byteList.add(new Uint8Array([0xc0]));
byteParts.push(new Uint8Array([0xc0]));
return;
}
if (object === false) {
byteList.add(new Uint8Array([0xc2]));
byteParts.push(new Uint8Array([0xc2]));
return;
}
if (object === true) {
byteList.add(new Uint8Array([0xc3]));
byteParts.push(new Uint8Array([0xc3]));
return;
}
if (typeof object === "number") {
byteList.add(encodeNumber(object));
byteParts.push(encodeNumber(object));
return;
}
@ -149,7 +149,7 @@ function encodeSlice(object: ValueType, byteList: BytesList) {
const dataView = new DataView(new ArrayBuffer(9));
dataView.setBigInt64(1, object);
dataView.setUint8(0, 0xd3);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
return;
}
@ -160,7 +160,7 @@ function encodeSlice(object: ValueType, byteList: BytesList) {
const dataView = new DataView(new ArrayBuffer(9));
dataView.setBigUint64(1, object);
dataView.setUint8(0, 0xcf);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
return;
}
@ -169,63 +169,63 @@ function encodeSlice(object: ValueType, byteList: BytesList) {
const len = encoded.length;
if (len < FIVE_BITS) { // fixstr
byteList.add(new Uint8Array([0xa0 | len]));
byteParts.push(new Uint8Array([0xa0 | len]));
} else if (len < EIGHT_BITS) { // str 8
byteList.add(new Uint8Array([0xd9, len]));
byteParts.push(new Uint8Array([0xd9, len]));
} else if (len < SIXTEEN_BITS) { // str 16
const dataView = new DataView(new ArrayBuffer(3));
dataView.setUint16(1, len);
dataView.setUint8(0, 0xda);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else if (len < THIRTY_TWO_BITS) { // str 32
const dataView = new DataView(new ArrayBuffer(5));
dataView.setUint32(1, len);
dataView.setUint8(0, 0xdb);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else {
throw new Error(
"Cannot safely encode string with size larger than 32 bits",
);
}
byteList.add(encoded);
byteParts.push(encoded);
return;
}
if (object instanceof Uint8Array) {
if (object.length < EIGHT_BITS) { // bin 8
byteList.add(new Uint8Array([0xc4, object.length]));
byteParts.push(new Uint8Array([0xc4, object.length]));
} else if (object.length < SIXTEEN_BITS) { // bin 16
const dataView = new DataView(new ArrayBuffer(3));
dataView.setUint16(1, object.length);
dataView.setUint8(0, 0xc5);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else if (object.length < THIRTY_TWO_BITS) { // bin 32
const dataView = new DataView(new ArrayBuffer(5));
dataView.setUint32(1, object.length);
dataView.setUint8(0, 0xc6);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else {
throw new Error(
"Cannot safely encode Uint8Array with size larger than 32 bits",
);
}
byteList.add(object);
byteParts.push(object);
return;
}
if (Array.isArray(object)) {
if (object.length < FOUR_BITS) { // fixarray
byteList.add(new Uint8Array([0x90 | object.length]));
byteParts.push(new Uint8Array([0x90 | object.length]));
} else if (object.length < SIXTEEN_BITS) { // array 16
const dataView = new DataView(new ArrayBuffer(3));
dataView.setUint16(1, object.length);
dataView.setUint8(0, 0xdc);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else if (object.length < THIRTY_TWO_BITS) { // array 32
const dataView = new DataView(new ArrayBuffer(5));
dataView.setUint32(1, object.length);
dataView.setUint8(0, 0xdd);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else {
throw new Error(
"Cannot safely encode array with size larger than 32 bits",
@ -233,7 +233,7 @@ function encodeSlice(object: ValueType, byteList: BytesList) {
}
for (const obj of object) {
encodeSlice(obj, byteList);
encodeSlice(obj, byteParts);
}
return;
}
@ -243,24 +243,24 @@ function encodeSlice(object: ValueType, byteList: BytesList) {
const numKeys = Object.keys(object).length;
if (numKeys < FOUR_BITS) { // fixarray
byteList.add(new Uint8Array([0x80 | numKeys]));
byteParts.push(new Uint8Array([0x80 | numKeys]));
} else if (numKeys < SIXTEEN_BITS) { // map 16
const dataView = new DataView(new ArrayBuffer(3));
dataView.setUint16(1, numKeys);
dataView.setUint8(0, 0xde);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else if (numKeys < THIRTY_TWO_BITS) { // map 32
const dataView = new DataView(new ArrayBuffer(5));
dataView.setUint32(1, numKeys);
dataView.setUint8(0, 0xdf);
byteList.add(new Uint8Array(dataView.buffer));
byteParts.push(new Uint8Array(dataView.buffer));
} else {
throw new Error("Cannot safely encode map with size larger than 32 bits");
}
for (const [key, value] of Object.entries(object)) {
encodeSlice(key, byteList);
encodeSlice(value, byteList);
encodeSlice(key, byteParts);
encodeSlice(value, byteParts);
}
return;
}

View File

@ -6,11 +6,12 @@ export const DEFAULT_BUFFER_SIZE = 32 * 1024;
/** Generate longest proper prefix which is also suffix array. */
export function createLPS(pat: Uint8Array): Uint8Array {
const lps = new Uint8Array(pat.length);
const length = pat.length;
const lps = new Uint8Array(length);
lps[0] = 0;
let prefixEnd = 0;
let i = 1;
while (i < lps.length) {
while (i < length) {
if (pat[i] === pat[prefixEnd]) {
prefixEnd++;
lps[i] = prefixEnd;

View File

@ -1,7 +1,7 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.
import { BytesList } from "../bytes/bytes_list.ts";
import { concat } from "../bytes/concat.ts";
import { createLPS } from "./_common.ts";
/** Disposition of the delimiter. */
@ -52,13 +52,11 @@ export interface DelimiterStreamOptions {
* @returns Transform stream
*/
export class DelimiterStream extends TransformStream<Uint8Array, Uint8Array> {
#bufs = new BytesList();
#bufs: Uint8Array[] = [];
#delimiter: Uint8Array;
#inspectIndex = 0;
#matchIndex = 0;
#delimLen: number;
#delimLPS: Uint8Array;
#disp?: DelimiterDisposition;
#disp: DelimiterDisposition;
constructor(
delimiter: Uint8Array,
@ -69,12 +67,11 @@ export class DelimiterStream extends TransformStream<Uint8Array, Uint8Array> {
this.#handle(chunk, controller);
},
flush: (controller) => {
controller.enqueue(this.#bufs.concat());
controller.enqueue(concat(...this.#bufs));
},
});
this.#delimiter = delimiter;
this.#delimLen = delimiter.length;
this.#delimLPS = createLPS(delimiter);
this.#disp = options?.disposition ?? "discard";
}
@ -83,32 +80,109 @@ export class DelimiterStream extends TransformStream<Uint8Array, Uint8Array> {
chunk: Uint8Array,
controller: TransformStreamDefaultController<Uint8Array>,
) {
this.#bufs.add(chunk);
let localIndex = 0;
while (this.#inspectIndex < this.#bufs.size()) {
if (chunk[localIndex] === this.#delimiter[this.#matchIndex]) {
this.#inspectIndex++;
localIndex++;
this.#matchIndex++;
if (this.#matchIndex === this.#delimLen) {
const bufs = this.#bufs;
const length = chunk.byteLength;
const disposition = this.#disp;
const delimiter = this.#delimiter;
const delimLen = delimiter.length;
const lps = this.#delimLPS;
let chunkStart = 0;
let matchIndex = this.#matchIndex;
let inspectIndex = 0;
while (inspectIndex < length) {
if (chunk[inspectIndex] === delimiter[matchIndex]) {
// Next byte matched our next delimiter byte
inspectIndex++;
matchIndex++;
if (matchIndex === delimLen) {
// Full match
const start = this.#inspectIndex - this.#delimLen;
const end = this.#disp === "suffix" ? this.#inspectIndex : start;
const copy = this.#bufs.slice(0, end);
controller.enqueue(copy);
const shift = this.#disp === "prefix" ? start : this.#inspectIndex;
this.#bufs.shift(shift);
this.#inspectIndex = this.#disp === "prefix" ? this.#delimLen : 0;
this.#matchIndex = 0;
matchIndex = 0;
const delimiterStartIndex = inspectIndex - delimLen;
const delimitedChunkEnd = disposition === "suffix"
? inspectIndex
: delimiterStartIndex;
if (delimitedChunkEnd <= 0 && bufs.length === 0) {
// Our chunk started with a delimiter and no previous chunks exist:
// Enqueue an empty chunk.
controller.enqueue(new Uint8Array());
} else if (delimitedChunkEnd > 0 && bufs.length === 0) {
// No previous chunks, slice from current chunk.
controller.enqueue(chunk.subarray(chunkStart, delimitedChunkEnd));
// Our chunk may have more than one delimiter; we must remember where
// the next delimited chunk begins.
chunkStart = disposition === "prefix"
? delimiterStartIndex
: inspectIndex;
} else if (delimitedChunkEnd === 0 && bufs.length > 0) {
// Our chunk started with a delimiter, previous chunks are passed as
// they are (with concatenation).
if (bufs.length === 1) {
// Concat not needed when a single buffer is passed.
controller.enqueue(bufs[0]);
} else {
controller.enqueue(concat(...bufs));
}
// Drop all previous chunks.
bufs.length = 0;
if (disposition !== "prefix") {
// suffix or discard: The next chunk starts where our inspection finished.
// We should only ever end up here with a discard disposition as
// for a suffix disposition this branch would mean that the previous
// chunk ended with a full match but was not enqueued.
chunkStart = inspectIndex;
}
} else if (delimitedChunkEnd < 0 && bufs.length > 0) {
// Our chunk started by finishing a partial delimiter match.
const lastIndex = bufs.length - 1;
const last = bufs[lastIndex];
const lastSliceIndex = last.byteLength + delimitedChunkEnd;
const lastSliced = last.subarray(0, lastSliceIndex);
if (lastIndex === 0) {
controller.enqueue(lastSliced);
} else {
bufs[lastIndex] = lastSliced;
controller.enqueue(concat(...bufs));
}
bufs.length = 0;
if (disposition === "prefix") {
// Must keep last bytes of last chunk.
bufs.push(last.subarray(lastSliceIndex));
} else {
chunkStart = inspectIndex;
}
} else if (delimitedChunkEnd > 0 && bufs.length > 0) {
// Previous chunks and current chunk together form a delimited chunk.
const chunkSliced = chunk.subarray(chunkStart, delimitedChunkEnd);
const result = concat(...bufs, chunkSliced);
bufs.length = 0;
chunkStart = disposition === "prefix"
? delimitedChunkEnd
: inspectIndex;
controller.enqueue(result);
} else {
throw new Error("unreachable");
}
}
} else if (matchIndex === 0) {
// No match ongoing, keep going through the buffer.
inspectIndex++;
} else {
if (this.#matchIndex === 0) {
this.#inspectIndex++;
localIndex++;
} else {
this.#matchIndex = this.#delimLPS[this.#matchIndex - 1];
}
// Ongoing match: Degrade to the previous possible match.
// eg. If we're looking for 'AAB' and had matched 'AA' previously
// but now got a new 'A', then we'll drop down to having matched
// just 'A'. The while loop will turn around again and we'll rematch
// to 'AA' and proceed onwards to try and match on 'B' again.
matchIndex = lps[matchIndex - 1];
}
}
// Save match index.
this.#matchIndex = matchIndex;
if (chunkStart === 0) {
bufs.push(chunk);
} else if (chunkStart !== length - 1) {
// If we matched partially somewhere in the middle of our chunk
// then the remnants should be pushed into buffers.
bufs.push(chunk.subarray(chunkStart));
}
}
}

View File

@ -3,71 +3,78 @@
import { DelimiterStream } from "./delimiter_stream.ts";
import { testTransformStream } from "./_test_common.ts";
const DELIMITER_STREAM_INPUTS = [
"a", // more than one subsequent chunks with no delimiters
"b", // more than one subsequent chunks with no delimiters
"cCRLF", // more than one subsequent chunks with no delimiters
"CRLF", // chunk with only delimiter
"qwertzu", // no delimiter
"iopasdCRLFmnbvc", // one delimiter in the middle
"xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters
"euoiCRLFCRLFaueiou", // two consecutive delimiters
"rewq098765432CR", // split delimiter (1/2)
"LF349012i491290", // split delimiter (2/2)
"asdfghjkliopCR", // split delimiter with followup (1/2)
"LFytrewqCRLFmnbvcxz", // split delimiter with followup (2/2)
"CRLFasd", // chunk starts with delimiter
].map((s) => new TextEncoder().encode(s));
Deno.test("[streams] DelimiterStream, discard", async () => {
const crlf = new TextEncoder().encode("CRLF");
const delimStream = new DelimiterStream(crlf, { disposition: "discard" });
const inputs = [
"qwertzu", // no delimiter
"iopasdCRLFmnbvc", // one delimiter in the middle
"xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters
"euoiCRLFCRLFaueiou", // two consecutive delimiters
"rewq098765432CR", // split delimiter (1/2)
"LF349012i491290", // split delimiter (2/2)
].map((s) => new TextEncoder().encode(s));
const outputs = [
"abc",
"",
"qwertzuiopasd",
"mnbvcxylkjh",
"gfdsap",
"oiuzteuoi",
"",
"aueiourewq098765432",
"349012i491290",
"349012i491290asdfghjkliop",
"ytrewq",
"mnbvcxz",
"asd",
].map((s) => new TextEncoder().encode(s));
await testTransformStream(delimStream, inputs, outputs);
await testTransformStream(delimStream, DELIMITER_STREAM_INPUTS, outputs);
});
Deno.test("[streams] DelimiterStream, suffix", async () => {
const crlf = new TextEncoder().encode("CRLF");
const delimStream = new DelimiterStream(crlf, { disposition: "suffix" });
const inputs = [
"qwertzu", // no delimiter
"iopasdCRLFmnbvc", // one delimiter in the middle
"xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters
"euoiCRLFCRLFaueiou", // two consecutive delimiters
"rewq098765432CR", // split delimiter (1/2)
"LF349012i491290", // split delimiter (2/2)
].map((s) => new TextEncoder().encode(s));
const outputs = [
"abcCRLF",
"CRLF",
"qwertzuiopasdCRLF",
"mnbvcxylkjhCRLF",
"gfdsapCRLF",
"oiuzteuoiCRLF",
"CRLF",
"aueiourewq098765432CRLF",
"349012i491290",
"349012i491290asdfghjkliopCRLF",
"ytrewqCRLF",
"mnbvcxzCRLF",
"asd",
].map((s) => new TextEncoder().encode(s));
await testTransformStream(delimStream, inputs, outputs);
await testTransformStream(delimStream, DELIMITER_STREAM_INPUTS, outputs);
});
Deno.test("[streams] DelimiterStream, prefix", async () => {
const crlf = new TextEncoder().encode("CRLF");
const delimStream = new DelimiterStream(crlf, { disposition: "prefix" });
const inputs = [
"qwertzu", // no delimiter
"iopasdCRLFmnbvc", // one delimiter in the middle
"xylkjhCRLFgfdsapCRLFoiuzt", // two separate delimiters
"euoiCRLFCRLFaueiou", // two consecutive delimiters
"rewq098765432CR", // split delimiter (1/2)
"LF349012i491290", // split delimiter (2/2)
].map((s) => new TextEncoder().encode(s));
const outputs = [
"qwertzuiopasd",
"abc",
"CRLF",
"CRLFqwertzuiopasd",
"CRLFmnbvcxylkjh",
"CRLFgfdsap",
"CRLFoiuzteuoi",
"CRLF",
"CRLFaueiourewq098765432",
"CRLF349012i491290",
"CRLF349012i491290asdfghjkliop",
"CRLFytrewq",
"CRLFmnbvcxz",
"CRLFasd",
].map((s) => new TextEncoder().encode(s));
await testTransformStream(delimStream, inputs, outputs);
await testTransformStream(delimStream, DELIMITER_STREAM_INPUTS, outputs);
});