2024-01-01 21:11:32 +00:00
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2024-04-10 02:43:44 +00:00
|
|
|
// This module is browser compatible.
|
2023-11-05 12:49:00 +00:00
|
|
|
import { levenshteinDistance } from "./levenshtein_distance.ts";
|
2024-04-29 02:57:30 +00:00
|
|
|
import { assert } from "@std/assert/assert";
|
2023-11-05 12:49:00 +00:00
|
|
|
|
|
|
|
// NOTE: this metric may change in future versions (e.g. better than levenshteinDistance)
|
|
|
|
const getWordDistance = levenshteinDistance;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* get most-similar word
|
|
|
|
*
|
2024-05-23 17:01:10 +00:00
|
|
|
* @example Usage
|
2023-11-05 12:49:00 +00:00
|
|
|
* ```ts
|
2024-04-29 02:57:30 +00:00
|
|
|
* import { closestString } from "@std/text/closest-string";
|
2023-11-05 12:49:00 +00:00
|
|
|
*
|
|
|
|
* const possibleWords: string[] = ["length", "size", "blah", "help"];
|
|
|
|
*
|
|
|
|
* // case-insensitive by default
|
|
|
|
* const word = closestString("hep", possibleWords);
|
|
|
|
* ```
|
|
|
|
*
|
2024-05-23 17:01:10 +00:00
|
|
|
* @param givenWord The string to measure distance against
|
|
|
|
* @param possibleWords The string-array that will be sorted
|
|
|
|
* @param options An options bag containing a `caseSensitive` flag indicating
|
|
|
|
* whether the distance should include case. Default is false.
|
2023-11-05 12:49:00 +00:00
|
|
|
* @returns A sorted copy of possibleWords
|
|
|
|
* @note
|
|
|
|
* the ordering of words may change with version-updates
|
|
|
|
* e.g. word-distance metric may change (improve)
|
|
|
|
* use a named-distance (e.g. levenshteinDistance) to
|
|
|
|
* guarantee a particular ordering
|
|
|
|
*/
|
|
|
|
export function closestString(
|
|
|
|
givenWord: string,
|
|
|
|
possibleWords: string[],
|
|
|
|
options?: {
|
|
|
|
caseSensitive?: boolean;
|
|
|
|
},
|
|
|
|
): string {
|
|
|
|
assert(
|
|
|
|
possibleWords.length > 0,
|
|
|
|
`When using closestString(), the possibleWords array must contain at least one word`,
|
|
|
|
);
|
|
|
|
const { caseSensitive } = { ...options };
|
|
|
|
|
|
|
|
if (!caseSensitive) {
|
|
|
|
givenWord = givenWord.toLowerCase();
|
|
|
|
}
|
|
|
|
|
2024-01-10 20:33:57 +00:00
|
|
|
let nearestWord = possibleWords[0]!;
|
2023-12-12 10:34:42 +00:00
|
|
|
let closestStringDistance = Infinity;
|
2023-11-05 12:49:00 +00:00
|
|
|
for (const each of possibleWords) {
|
|
|
|
const distance = caseSensitive
|
|
|
|
? getWordDistance(givenWord, each)
|
|
|
|
: getWordDistance(givenWord, each.toLowerCase());
|
|
|
|
if (distance < closestStringDistance) {
|
|
|
|
nearestWord = each;
|
|
|
|
closestStringDistance = distance;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// this distance metric could be swapped/improved in the future
|
|
|
|
return nearestWord;
|
|
|
|
}
|