docs(archive): further documentation (#4462)

docs(archive): complete documentation
This commit is contained in:
Asher Gomez 2024-03-11 20:52:36 +11:00 committed by GitHub
parent d7cfbcb466
commit d63e97c735
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 144 additions and 38 deletions

View File

@ -3,39 +3,55 @@
import { PartialReadError } from "../io/buf_reader.ts";
import type { Reader } from "../io/types.ts";
/** Base interface for {@linkcode TarMeta} */
export interface TarInfo {
/**
* The underlying raw `st_mode` bits that contain the standard Unix
* permissions for this file/directory.
*/
fileMode?: number;
/**
* Data modification time of the file at the time it was archived. It
* represents the integer number of seconds since January 1, 1970, 00:00 UTC.
*/
mtime?: number;
/**
* Numeric user ID of the file owner. This is ignored if the operating system
* does not support numeric user IDs.
*/
uid?: number;
/**
* Numeric group ID of the file owner. This is ignored if the operating
* system does not support numeric group IDs.
*/
gid?: number;
/** The name of the file owner. */
owner?: string;
/** The group that the file owner belongs to. */
group?: string;
/**
* The type of file archived.
*
* @see {@linkcode FileTypes}
*/
type?: string;
}
export interface TarOptions extends TarInfo {
/**
* Filepath of the file to append to the archive
*/
filePath?: string;
/**
* A Reader of any arbitrary content to append to the archive
*/
reader?: Reader;
/**
* Size of the content to be appended. This is only required
* when passing a reader to the archive.
*/
contentSize?: number;
}
/** Base interface for {@linkcode TarMetaWithLinkName}. */
export interface TarMeta extends TarInfo {
/**
* The name of the file, with directory names (if any) preceding the file
* name, separated by slashes.
*/
fileName: string;
/**
* The size of the file in bytes; for archive members that are symbolic or
* hard links to another file, this field is specified as zero.
*/
fileSize?: number;
}
/** The type of file archived. */
export enum FileTypes {
"file" = 0,
"link" = 1,
@ -138,6 +154,9 @@ export const USTAR_STRUCTURE = [
},
] as const;
/**
* @internal
*/
export type UstarFields = (typeof USTAR_STRUCTURE)[number]["field"];
export async function readBlock(

View File

@ -34,17 +34,33 @@
* archive file, while untar is the inverse utility to extract the files from an
* archive. Files are not compressed, only collected into the archive.
*
* ### File format and limitations
* ```ts
* import { Tar } from "https://deno.land/std@$STD_VERSION/archive/tar.ts";
* import { Buffer } from "https://deno.land/std@$STD_VERSION/io/buffer.ts";
* import { copy } from "https://deno.land/std@$STD_VERSION/io/copy.ts";
*
* The ustar file format is used for creating the archive file.
* While this format is compatible with most tar readers,
* the format has several limitations, including:
* * Files must be smaller than 8GiB
* * Filenames (including path) must be shorter than 256 characters
* * Filenames (including path) cannot contain non-ASCII characters
* * Sparse files are not supported
* In addition to the ustar format, untar may also read from the pax format.
* However, additional features, such as longer filenames, may be ignored.
* const tar = new Tar();
*
* // Now that we've created our tar, let's add some files to it:
*
* const content = new TextEncoder().encode("Some arbitrary content");
* await tar.append("deno.txt", {
* reader: new Buffer(content),
* contentSize: content.byteLength,
* });
*
* // This file is sourced from the filesystem (and renamed in the archive)
* await tar.append("filename_in_archive.txt", {
* filePath: "./filename_on_filesystem.txt",
* });
*
* // Now let's write the tar (with it's two files) to the filesystem
* // use tar.getReader() to read the contents.
*
* const writer = await Deno.open("./out.tar", { write: true, create: true });
* await copy(tar.getReader(), writer);
* writer.close();
* ```
*
* @module
*/

View File

@ -32,7 +32,6 @@ import {
FileTypes,
type TarInfo,
type TarMeta,
type TarOptions,
USTAR_STRUCTURE,
} from "./_common.ts";
import type { Reader } from "../io/types.ts";
@ -41,7 +40,26 @@ import { Buffer } from "../io/buffer.ts";
import { assert } from "../assert/assert.ts";
import { HEADER_LENGTH } from "./_common.ts";
export type { TarInfo, TarMeta, TarOptions };
export type { TarInfo, TarMeta };
/** Options for {@linkcode Tar.append}. */
export interface TarOptions extends TarInfo {
/**
* Filepath of the file to append to the archive
*/
filePath?: string;
/**
* A Reader of any arbitrary content to append to the archive
*/
reader?: Reader;
/**
* Size of the content to be appended. This is only required
* when passing a reader to the archive.
*/
contentSize?: number;
}
const USTAR_MAGIC_HEADER = "ustar\u000000" as const;
@ -95,28 +113,61 @@ function formatHeader(data: TarData): Uint8Array {
return buffer;
}
/** Base interface for {@linkcode TarDataWithSource}. */
export interface TarData {
/** Name of the file, excluding directory names (if any). */
fileName?: string;
/** Directory names preceding the file name (if any). */
fileNamePrefix?: string;
/**
* The underlying raw `st_mode` bits that contain the standard Unix
* permissions for this file/directory.
*/
fileMode?: string;
/**
* Numeric user ID of the file owner. This is ignored if the operating system
* does not support numeric user IDs.
*/
uid?: string;
/**
* Numeric group ID of the file owner. This is ignored if the operating
* system does not support numeric group IDs.
*/
gid?: string;
/**
* The size of the file in bytes; for archive members that are symbolic or
* hard links to another file, this field is specified as zero.
*/
fileSize?: string;
/**
* Data modification time of the file at the time it was archived. It
* represents the integer number of seconds since January 1, 1970, 00:00 UTC.
*/
mtime?: string;
/** The simple sum of all bytes in the header block */
checksum?: string;
/**
* The type of file archived.
*
* @see {@linkcode FileTypes}
*/
type?: string;
/** Ustar magic header */
ustar?: string;
/** The name of the file owner. */
owner?: string;
/** The group that the file owner belongs to. */
group?: string;
}
/** Tar data interface for {@linkcode Tar.data}. */
export interface TarDataWithSource extends TarData {
/**
* file to read
* Path of the file to read.
*/
filePath?: string;
/**
* buffer to read
* Buffer reader.
*/
reader?: Reader;
}
@ -177,8 +228,10 @@ export interface TarDataWithSource extends TarData {
* ```
*/
export class Tar {
/** Tar data. */
data: TarDataWithSource[];
/** Constructs a new instance. */
constructor() {
this.data = [];
}
@ -190,11 +243,10 @@ export class Tar {
* directory's contents. Directories and subdirectories will be created automatically
* in the archive as required.
*
* @param filenameInArchive file name of the content in the archive
* e.g., test.txt; use slash for directory separators
* @param source details of the source of the content including the
* reference to the content itself and potentially any
* related metadata.
* @param filenameInArchive File name of the content in the archive. E.g.
* `test.txt`. Use slash for directory separators.
* @param source Details of the source of the content including the
* reference to the content itself and potentially any related metadata.
*/
async append(filenameInArchive: string, source: TarOptions) {
if (typeof filenameInArchive !== "string") {
@ -203,7 +255,7 @@ export class Tar {
let fileName = filenameInArchive;
/**
* Ustar format has a limitation of file name length. Specifically:
* Ustar format has a limitation of file name length. Specifically:
* 1. File names can contain at most 255 bytes.
* 2. File names longer than 100 bytes must be split at a directory separator in two parts,
* the first being at most 155 bytes long. So, in most cases file names must be a bit shorter

View File

@ -45,9 +45,11 @@ import type { Reader } from "../io/types.ts";
* symbolic link values without polluting the world of archive writers.
*/
export interface TarMetaWithLinkName extends TarMeta {
/** File name of the symbolic link. */
linkName?: string;
}
/** Tar header with raw, unprocessed bytes as values. */
export type TarHeader = {
[key in UstarFields]: Uint8Array;
};
@ -83,9 +85,11 @@ function parseHeader(buffer: Uint8Array): TarHeader {
return data;
}
/** Tar entry */
// deno-lint-ignore no-empty-interface
export interface TarEntry extends TarMetaWithLinkName {}
/** Contains tar header metadata and a reader to the entry's body. */
export class TarEntry implements Reader {
#header: TarHeader;
#reader: Reader | (Reader & Deno.Seeker);
@ -93,6 +97,8 @@ export class TarEntry implements Reader {
#read = 0;
#consumed = false;
#entrySize: number;
/** Constructs a new instance. */
constructor(
meta: TarMetaWithLinkName,
header: TarHeader,
@ -109,10 +115,19 @@ export class TarEntry implements Reader {
this.#entrySize = blocks * HEADER_LENGTH;
}
/** Returns whether the entry has already been consumed. */
get consumed(): boolean {
return this.#consumed;
}
/**
* Reads up to `p.byteLength` bytes of the tar entry into `p`. It resolves to
* the number of bytes read (`0 < n <= p.byteLength`) and rejects if any
* error encountered. Even if read() resolves to n < p.byteLength, it may use
* all of `p` as scratch space during the call. If some data is available but
* not `p.byteLength bytes`, read() conventionally resolves to what is available
* instead of waiting for more.
*/
async read(p: Uint8Array): Promise<number | null> {
// Bytes left for entry
const entryBytesLeft = this.#entrySize - this.#read;
@ -144,6 +159,7 @@ export class TarEntry implements Reader {
return offset < 0 ? n - Math.abs(offset) : offset;
}
/** Discords the current entry. */
async discard() {
// Discard current entry
if (this.#consumed) return;
@ -208,10 +224,13 @@ export class TarEntry implements Reader {
* ```
*/
export class Untar {
/** Internal reader. */
reader: Reader;
/** Internal block. */
block: Uint8Array;
#entry: TarEntry | undefined;
/** Constructs a new instance. */
constructor(reader: Reader) {
this.reader = reader;
this.block = new Uint8Array(HEADER_LENGTH);