2024-01-01 21:11:32 +00:00
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2024-04-10 02:43:44 +00:00
|
|
|
// This module is browser compatible.
|
2023-11-05 12:49:00 +00:00
|
|
|
import { levenshteinDistance } from "./levenshtein_distance.ts";
|
|
|
|
|
2024-06-11 07:34:14 +00:00
|
|
|
/** Options for {@linkcode compareSimilarity}. */
|
|
|
|
export interface CompareSimilarityOptions {
|
|
|
|
/**
|
|
|
|
* Whether the distance should include case.
|
|
|
|
*
|
|
|
|
* @default {false}
|
|
|
|
*/
|
|
|
|
caseSensitive?: boolean;
|
2024-07-16 19:19:21 +00:00
|
|
|
/**
|
|
|
|
* A custom comparison function to use for comparing strings.
|
|
|
|
*
|
|
|
|
* @param a The first string for comparison.
|
|
|
|
* @param b The second string for comparison.
|
|
|
|
* @returns The distance between the two strings.
|
|
|
|
* @default {levenshteinDistance}
|
|
|
|
*/
|
|
|
|
compareFn?: (a: string, b: string) => number;
|
2024-06-11 07:34:14 +00:00
|
|
|
}
|
|
|
|
|
2023-11-05 12:49:00 +00:00
|
|
|
/**
|
2024-07-12 04:57:32 +00:00
|
|
|
* Takes a string and generates a comparator function to determine which of two
|
|
|
|
* strings is more similar to the given one.
|
|
|
|
*
|
2024-07-16 19:19:21 +00:00
|
|
|
* By default, calculates the distance between words using the
|
|
|
|
* {@link https://en.wikipedia.org/wiki/Levenshtein_distance | Levenshtein distance}.
|
2023-11-05 12:49:00 +00:00
|
|
|
*
|
2024-05-23 17:01:10 +00:00
|
|
|
* @param givenWord The string to measure distance against.
|
2024-06-11 07:34:14 +00:00
|
|
|
* @param options Options for the sort.
|
2024-05-23 17:01:10 +00:00
|
|
|
* @returns The difference in distance. This will be a negative number if `a`
|
|
|
|
* is more similar to `givenWord` than `b`, a positive number if `b` is more
|
|
|
|
* similar, or `0` if they are equally similar.
|
|
|
|
*
|
|
|
|
* @example Usage
|
2024-06-03 04:10:27 +00:00
|
|
|
*
|
|
|
|
* Most-similar words will be at the start of the array.
|
|
|
|
*
|
2023-11-05 12:49:00 +00:00
|
|
|
* ```ts
|
2024-04-29 02:57:30 +00:00
|
|
|
* import { compareSimilarity } from "@std/text/compare-similarity";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 04:10:27 +00:00
|
|
|
*
|
2023-11-05 12:49:00 +00:00
|
|
|
* const words = ["hi", "hello", "help"];
|
2024-07-12 04:57:32 +00:00
|
|
|
* const sortedWords = words.toSorted(compareSimilarity("hep"));
|
2023-11-05 12:49:00 +00:00
|
|
|
*
|
2024-06-03 04:10:27 +00:00
|
|
|
* assertEquals(sortedWords, ["help", "hi", "hello"]);
|
2023-11-05 12:49:00 +00:00
|
|
|
* ```
|
|
|
|
*/
|
|
|
|
export function compareSimilarity(
|
|
|
|
givenWord: string,
|
2024-06-11 07:34:14 +00:00
|
|
|
options?: CompareSimilarityOptions,
|
2023-12-19 00:26:13 +00:00
|
|
|
): (a: string, b: string) => number {
|
2024-07-16 19:19:21 +00:00
|
|
|
const { compareFn = levenshteinDistance } = { ...options };
|
2024-06-11 07:34:14 +00:00
|
|
|
if (options?.caseSensitive) {
|
2023-11-05 12:49:00 +00:00
|
|
|
return (a: string, b: string) =>
|
2024-07-16 19:19:21 +00:00
|
|
|
compareFn(givenWord, a) - compareFn(givenWord, b);
|
2023-11-05 12:49:00 +00:00
|
|
|
}
|
|
|
|
givenWord = givenWord.toLowerCase();
|
|
|
|
return (a: string, b: string) =>
|
2024-07-16 19:19:21 +00:00
|
|
|
compareFn(givenWord, a.toLowerCase()) -
|
|
|
|
compareFn(givenWord, b.toLowerCase());
|
2023-11-05 12:49:00 +00:00
|
|
|
}
|