std/encoding/ascii85.ts

// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// This module is browser compatible.

/**
 * {@linkcode encode} and {@linkcode decode} for
 * [Ascii85/base85](https://en.wikipedia.org/wiki/Ascii85) encoding.
 *
 * This module is browser compatible.
 *
 * ## Specifying a standard and delimiter
 *
 * By default, all functions are using the most popular Adobe version of ascii85
 * and not adding any delimiter. However, there are three more standards
 * supported - btoa (different delimiter and additional compression of 4 bytes
 * equal to 32), [Z85](https://rfc.zeromq.org/spec/32/) and
 * [RFC 1924](https://tools.ietf.org/html/rfc1924). It's possible to use a
 * different encoding by specifying it in `options` object as a second parameter.
 *
 * Similarly, it's possible to make `encode` add a delimiter (`<~` and `~>` for
 * Adobe, `xbtoa Begin` and `xbtoa End` with newlines between the delimiters and
 * encoded data for btoa. Checksums for btoa are not supported. Delimiters are not
 * supported by other encodings.)
 *
 * @example
 * ```ts
 * import {
 *   decode,
 *   encode,
 * } from "https://deno.land/std@$STD_VERSION/encoding/ascii85.ts";
 *
 * const a85Repr = "LpTqp";
 *
 * const binaryData = decode(a85Repr);
 * console.log(binaryData);
 * // => Uint8Array [ 136, 180, 79, 24 ]
 *
 * console.log(encode(binaryData));
 * // => LpTqp
 * ```
 *
 * @module
 */

export type Ascii85Standard = "Adobe" | "btoa" | "RFC 1924" | "Z85";

/** encoding/decoding options */
export interface Ascii85Options {
  /** characterset and delimiter (if supported and used).
   *
   * @default {"Adobe"}
   */
  standard?: Ascii85Standard;
  /** whether to use a delimiter (if supported) - "<~" and "~>" by default */
  delimiter?: boolean;
}
const rfc1924 =
  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
const Z85 =
  "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#";
/**
 * Encodes a given Uint8Array into ascii85, supports multiple standards
 * @param uint8 input to encode
 * @param [options] encoding options
 * @param [options.standard=Adobe] encoding standard (Adobe, btoa, RFC 1924 or Z85)
 * @param [options.delimiter] whether to use a delimiter, if supported by encoding standard
 */
export function encode(uint8: Uint8Array, options?: Ascii85Options): string {
  const standard = options?.standard ?? "Adobe";
  let output: string[] = [],
    v: number,
    n = 0,
    difference = 0;
  if (uint8.length % 4 !== 0) {
    const tmp = uint8;
    difference = 4 - (tmp.length % 4);
    uint8 = new Uint8Array(tmp.length + difference);
    uint8.set(tmp);
  }
  const view = new DataView(uint8.buffer);
  for (let i = 0, len = uint8.length; i < len; i += 4) {
    v = view.getUint32(i);
    // Adobe and btoa standards compress 4 zeroes to single "z" character
    if (
      (standard === "Adobe" || standard === "btoa") &&
      v === 0 &&
      i < len - difference - 3
    ) {
      output[n++] = "z";
      continue;
    }
    // btoa compresses 4 spaces - that is, bytes equal to 32 - into single "y" character
    if (standard === "btoa" && v === 538976288) {
      output[n++] = "y";
      continue;
    }
    for (let j = 4; j >= 0; j--) {
      output[n + j] = String.fromCharCode((v % 85) + 33);
      v = Math.trunc(v / 85);
    }
    n += 5;
  }
  switch (standard) {
    case "Adobe":
      if (options?.delimiter) {
        return `<~${output.slice(0, output.length - difference).join("")}~>`;
      }
      break;
    case "btoa":
      if (options?.delimiter) {
        return `xbtoa Begin\n${
          output
            .slice(0, output.length - difference)
            .join("")
        }\nxbtoa End`;
      }
      break;
    case "RFC 1924":
      output = output.map((val) => rfc1924[val.charCodeAt(0) - 33]);
      break;
    case "Z85":
      output = output.map((val) => Z85[val.charCodeAt(0) - 33]);
      break;
  }
  return output.slice(0, output.length - difference).join("");
}
/**
 * Decodes a given ascii85 encoded string.
 * @param ascii85 input to decode
 * @param [options] decoding options
 * @param [options.standard=Adobe] encoding standard used in the input string (Adobe, btoa, RFC 1924 or Z85)
 */
export function decode(ascii85: string, options?: Ascii85Options): Uint8Array {
  const encoding = options?.standard ?? "Adobe";
  // translate all encodings to most basic adobe/btoa one and decompress some special characters ("z" and "y")
  switch (encoding) {
    case "Adobe":
      ascii85 = ascii85.replaceAll(/(<~|~>)/g, "").replaceAll("z", "!!!!!");
      break;
    case "btoa":
      ascii85 = ascii85
        .replaceAll(/(xbtoa Begin|xbtoa End|\n)/g, "")
        .replaceAll("z", "!!!!!")
        .replaceAll("y", "+<VdL");
      break;
    case "RFC 1924":
      ascii85 = ascii85.replaceAll(
        /./g,
        (match) => String.fromCharCode(rfc1924.indexOf(match) + 33),
      );
      break;
    case "Z85":
      ascii85 = ascii85.replaceAll(
        /./g,
        (match) => String.fromCharCode(Z85.indexOf(match) + 33),
      );
      break;
  }
  //remove all invalid characters
  ascii85 = ascii85.replaceAll(/[^!-u]/g, "");
  const len = ascii85.length,
    output = new Uint8Array(len + 4 - (len % 4));
  const view = new DataView(output.buffer);
  let v = 0,
    n = 0,
    max = 0;
  for (let i = 0; i < len;) {
    for (max += 5; i < max; i++) {
      v = v * 85 + (i < len ? ascii85.charCodeAt(i) : 117) - 33;
    }
    view.setUint32(n, v);
    v = 0;
    n += 4;
  }
  return output.slice(0, Math.trunc(len * 0.8));
}
chore: update copyright header (#3082) 2023-01-03 10:47:44 +00:00			`// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.`
chore(tools): improved web-compatibility checks (#3263) 2023-03-18 12:36:00 +00:00			`// This module is browser compatible.`
docs: improve module documentation (#2511) 2022-08-11 11:51:20 +00:00
			`/**`
			`* {@linkcode encode} and {@linkcode decode} for`
			`* [Ascii85/base85](https://en.wikipedia.org/wiki/Ascii85) encoding.`
			`*`
			`* This module is browser compatible.`
			`*`
docs: reorganize docs (#2658) 2022-11-25 11:40:23 +00:00			`* ## Specifying a standard and delimiter`
			`*`
			`* By default, all functions are using the most popular Adobe version of ascii85`
			`* and not adding any delimiter. However, there are three more standards`
			`* supported - btoa (different delimiter and additional compression of 4 bytes`
			`* equal to 32), [Z85](https://rfc.zeromq.org/spec/32/) and`
			`* [RFC 1924](https://tools.ietf.org/html/rfc1924). It's possible to use a`
			* different encoding by specifying it in `options` object as a second parameter.
			`*`
			* Similarly, it's possible to make `encode` add a delimiter (`<~` and `~>` for
			* Adobe, `xbtoa Begin` and `xbtoa End` with newlines between the delimiters and
			`* encoded data for btoa. Checksums for btoa are not supported. Delimiters are not`
			`* supported by other encodings.)`
			`*`
			`* @example`
			* ```ts
			`* import {`
			`* decode,`
			`* encode,`
			`* } from "https://deno.land/std@$STD_VERSION/encoding/ascii85.ts";`
			`*`
			`* const a85Repr = "LpTqp";`
			`*`
			`* const binaryData = decode(a85Repr);`
			`* console.log(binaryData);`
			`* // => Uint8Array [ 136, 180, 79, 24 ]`
			`*`
			`* console.log(encode(binaryData));`
			`* // => LpTqp`
			* ```
			`*`
docs: improve module documentation (#2511) 2022-08-11 11:51:20 +00:00			`* @module`
			`*/`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00
			`export type Ascii85Standard = "Adobe" \| "btoa" \| "RFC 1924" \| "Z85";`
docs: improve module documentation (#2511) 2022-08-11 11:51:20 +00:00
docs: reorganize docs (#2658) 2022-11-25 11:40:23 +00:00			`/** encoding/decoding options */`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`export interface Ascii85Options {`
docs: reorganize docs (#2658) 2022-11-25 11:40:23 +00:00			`/** characterset and delimiter (if supported and used).`
			`*`
			`* @default {"Adobe"}`
			`*/`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`standard?: Ascii85Standard;`
docs: reorganize docs (#2658) 2022-11-25 11:40:23 +00:00			`/** whether to use a delimiter (if supported) - "<~" and "~>" by default */`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`delimiter?: boolean;`
			`}`
			`const rfc1924 =`
			"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{\|}~";
			`const Z85 =`
			`"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#";`
			`/**`
			`* Encodes a given Uint8Array into ascii85, supports multiple standards`
			`* @param uint8 input to encode`
			`* @param [options] encoding options`
			`* @param [options.standard=Adobe] encoding standard (Adobe, btoa, RFC 1924 or Z85)`
chore: fixed various misspellings and other typos (denoland/deno#8691) 2020-12-10 19:45:45 +00:00			`* @param [options.delimiter] whether to use a delimiter, if supported by encoding standard`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`*/`
			`export function encode(uint8: Uint8Array, options?: Ascii85Options): string {`
			`const standard = options?.standard ?? "Adobe";`
			`let output: string[] = [],`
			`v: number,`
			`n = 0,`
			`difference = 0;`
			`if (uint8.length % 4 !== 0) {`
			`const tmp = uint8;`
			`difference = 4 - (tmp.length % 4);`
			`uint8 = new Uint8Array(tmp.length + difference);`
			`uint8.set(tmp);`
			`}`
			`const view = new DataView(uint8.buffer);`
			`for (let i = 0, len = uint8.length; i < len; i += 4) {`
			`v = view.getUint32(i);`
			`// Adobe and btoa standards compress 4 zeroes to single "z" character`
			`if (`
			`(standard === "Adobe" \|\| standard === "btoa") &&`
			`v === 0 &&`
			`i < len - difference - 3`
			`) {`
			`output[n++] = "z";`
			`continue;`
			`}`
			`// btoa compresses 4 spaces - that is, bytes equal to 32 - into single "y" character`
			`if (standard === "btoa" && v === 538976288) {`
			`output[n++] = "y";`
			`continue;`
			`}`
			`for (let j = 4; j >= 0; j--) {`
			`output[n + j] = String.fromCharCode((v % 85) + 33);`
			`v = Math.trunc(v / 85);`
			`}`
			`n += 5;`
			`}`
			`switch (standard) {`
			`case "Adobe":`
			`if (options?.delimiter) {`
			return `<~${output.slice(0, output.length - difference).join("")}~>`;
			`}`
			`break;`
			`case "btoa":`
			`if (options?.delimiter) {`
Use dprint for internal formatting (denoland/deno#6682) 2020-07-14 19:24:17 +00:00			return `xbtoa Begin\n${
			`output`
			`.slice(0, output.length - difference)`
			`.join("")`
			}\nxbtoa End`;
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`}`
			`break;`
			`case "RFC 1924":`
			`output = output.map((val) => rfc1924[val.charCodeAt(0) - 33]);`
			`break;`
			`case "Z85":`
			`output = output.map((val) => Z85[val.charCodeAt(0) - 33]);`
			`break;`
			`}`
			`return output.slice(0, output.length - difference).join("");`
			`}`
			`/**`
			`* Decodes a given ascii85 encoded string.`
			`* @param ascii85 input to decode`
			`* @param [options] decoding options`
			`* @param [options.standard=Adobe] encoding standard used in the input string (Adobe, btoa, RFC 1924 or Z85)`
			`*/`
			`export function decode(ascii85: string, options?: Ascii85Options): Uint8Array {`
			`const encoding = options?.standard ?? "Adobe";`
			`// translate all encodings to most basic adobe/btoa one and decompress some special characters ("z" and "y")`
			`switch (encoding) {`
			`case "Adobe":`
			`ascii85 = ascii85.replaceAll(/(<~\|~>)/g, "").replaceAll("z", "!!!!!");`
			`break;`
			`case "btoa":`
			`ascii85 = ascii85`
			`.replaceAll(/(xbtoa Begin\|xbtoa End\|\n)/g, "")`
			`.replaceAll("z", "!!!!!")`
			`.replaceAll("y", "+<VdL");`
			`break;`
			`case "RFC 1924":`
Use dprint for internal formatting (denoland/deno#6682) 2020-07-14 19:24:17 +00:00			`ascii85 = ascii85.replaceAll(`
			`/./g,`
			`(match) => String.fromCharCode(rfc1924.indexOf(match) + 33),`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`);`
			`break;`
			`case "Z85":`
Use dprint for internal formatting (denoland/deno#6682) 2020-07-14 19:24:17 +00:00			`ascii85 = ascii85.replaceAll(`
			`/./g,`
			`(match) => String.fromCharCode(Z85.indexOf(match) + 33),`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`);`
			`break;`
			`}`
			`//remove all invalid characters`
			`ascii85 = ascii85.replaceAll(/[^!-u]/g, "");`
			`const len = ascii85.length,`
			`output = new Uint8Array(len + 4 - (len % 4));`
			`const view = new DataView(output.buffer);`
			`let v = 0,`
			`n = 0,`
			`max = 0;`
Use dprint for internal formatting (denoland/deno#6682) 2020-07-14 19:24:17 +00:00			`for (let i = 0; i < len;) {`
feat(encoding): add ascii85 module (denoland/deno#6711) 2020-07-14 18:26:49 +00:00			`for (max += 5; i < max; i++) {`
			`v = v * 85 + (i < len ? ascii85.charCodeAt(i) : 117) - 33;`
			`}`
			`view.setUint32(n, v);`
			`v = 0;`
			`n += 4;`
			`}`
			`return output.slice(0, Math.trunc(len * 0.8));`
			`}`