node-api: add support for UTF-8 and Latin-1 property keys

PR-URL: https://github.com/nodejs/node/pull/52984
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
Reviewed-By: Vladimir Morozov <vmorozov@microsoft.com>
This commit is contained in:
Mert Can Altin 2024-08-31 21:24:53 +03:00 committed by James M Snell
parent b4088f5a53
commit 75e4d0df8f
5 changed files with 224 additions and 129 deletions

View File

@ -3088,6 +3088,54 @@ The native string is copied.
The JavaScript `string` type is described in
[Section 6.1.4][] of the ECMAScript Language Specification.
### Functions to create optimized property keys
Many JavaScript engines including V8 use internalized strings as keys
to set and get property values. They typically use a hash table to create
and lookup such strings. While it adds some cost per key creation, it improves
the performance after that by enabling comparison of string pointers instead
of the whole strings.
If a new JavaScript string is intended to be used as a property key, then for
some JavaScript engines it will be more efficient to use the functions in this
section. Otherwise, use the `napi_create_string_utf8` or
`node_api_create_external_string_utf8` series functions as there may be
additional overhead in creating/storing strings with the property key
creation methods.
#### `node_api_create_property_key_latin1`
<!-- YAML
added: REPLACEME
-->
> Stability: 1 - Experimental
```c
napi_status NAPI_CDECL node_api_create_property_key_latin1(napi_env env,
const char* str,
size_t length,
napi_value* result);
```
* `[in] env`: The environment that the API is invoked under.
* `[in] str`: Character buffer representing an ISO-8859-1-encoded string.
* `[in] length`: The length of the string in bytes, or `NAPI_AUTO_LENGTH` if it
is null-terminated.
* `[out] result`: A `napi_value` representing an optimized JavaScript `string`
to be used as a property key for objects.
Returns `napi_ok` if the API succeeded.
This API creates an optimized JavaScript `string` value from
an ISO-8859-1-encoded C string to be used as a property key for objects.
The native string is copied. In contrast with `napi_create_string_latin1`,
subsequent calls to this function with the same `str` pointer may benefit from a speedup
in the creation of the requested `napi_value`, depending on the engine.
The JavaScript `string` type is described in
[Section 6.1.4][] of the ECMAScript Language Specification.
#### `node_api_create_property_key_utf16`
<!-- YAML
@ -3118,18 +3166,36 @@ This API creates an optimized JavaScript `string` value from
a UTF16-LE-encoded C string to be used as a property key for objects.
The native string is copied.
Many JavaScript engines including V8 use internalized strings as keys
to set and get property values. They typically use a hash table to create
and lookup such strings. While it adds some cost per key creation, it improves
the performance after that by enabling comparison of string pointers instead
of the whole strings.
The JavaScript `string` type is described in
[Section 6.1.4][] of the ECMAScript Language Specification.
If a new JavaScript string is intended to be used as a property key, then for
some JavaScript engines it will be more efficient to use
the `node_api_create_property_key_utf16` function.
Otherwise, use the `napi_create_string_utf16` or
`node_api_create_external_string_utf16` functions as there may be additional
overhead in creating/storing strings with this method.
#### `node_api_create_property_key_utf8`
<!-- YAML
added: REPLACEME
-->
> Stability: 1 - Experimental
```c
napi_status NAPI_CDECL node_api_create_property_key_utf8(napi_env env,
const char* str,
size_t length,
napi_value* result);
```
* `[in] env`: The environment that the API is invoked under.
* `[in] str`: Character buffer representing a UTF8-encoded string.
* `[in] length`: The length of the string in two-byte code units, or
`NAPI_AUTO_LENGTH` if it is null-terminated.
* `[out] result`: A `napi_value` representing an optimized JavaScript `string`
to be used as a property key for objects.
Returns `napi_ok` if the API succeeded.
This API creates an optimized JavaScript `string` value from
a UTF8-encoded C string to be used as a property key for objects.
The native string is copied.
The JavaScript `string` type is described in
[Section 6.1.4][] of the ECMAScript Language Specification.

View File

@ -114,6 +114,10 @@ node_api_create_external_string_utf16(napi_env env,
#ifdef NAPI_EXPERIMENTAL
#define NODE_API_EXPERIMENTAL_HAS_PROPERTY_KEYS
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_latin1(
napi_env env, const char* str, size_t length, napi_value* result);
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf8(
napi_env env, const char* str, size_t length, napi_value* result);
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf16(
napi_env env, const char16_t* str, size_t length, napi_value* result);
#endif // NAPI_EXPERIMENTAL

View File

@ -1704,6 +1704,30 @@ napi_status NAPI_CDECL node_api_create_external_string_utf16(
});
}
napi_status node_api_create_property_key_latin1(napi_env env,
const char* str,
size_t length,
napi_value* result) {
return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) {
return v8::String::NewFromOneByte(isolate,
reinterpret_cast<const uint8_t*>(str),
v8::NewStringType::kInternalized,
length);
});
}
napi_status node_api_create_property_key_utf8(napi_env env,
const char* str,
size_t length,
napi_value* result) {
return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) {
return v8::String::NewFromUtf8(isolate,
str,
v8::NewStringType::kInternalized,
static_cast<int>(length));
});
}
napi_status NAPI_CDECL node_api_create_property_key_utf16(napi_env env,
const char16_t* str,
size_t length,

View File

@ -4,131 +4,77 @@ const assert = require('assert');
// Testing api calls for string
const test_string = require(`./build/${common.buildType}/test_string`);
// The insufficient buffer test case allocates a buffer of size 4, including
// the null terminator.
const kInsufficientIdx = 3;
const empty = '';
assert.strictEqual(test_string.TestLatin1(empty), empty);
assert.strictEqual(test_string.TestUtf8(empty), empty);
assert.strictEqual(test_string.TestUtf16(empty), empty);
assert.strictEqual(test_string.TestLatin1AutoLength(empty), empty);
assert.strictEqual(test_string.TestUtf8AutoLength(empty), empty);
assert.strictEqual(test_string.TestUtf16AutoLength(empty), empty);
assert.strictEqual(test_string.TestLatin1External(empty), empty);
assert.strictEqual(test_string.TestUtf16External(empty), empty);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(empty), empty);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(empty), empty);
assert.strictEqual(test_string.TestPropertyKeyUtf16(empty), empty);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(empty), empty);
assert.strictEqual(test_string.Utf16Length(empty), 0);
assert.strictEqual(test_string.Utf8Length(empty), 0);
const asciiCases = [
'',
'hello world',
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
'?!@#$%^&*()_+-=[]{}/.,<>\'"\\',
];
const str1 = 'hello world';
assert.strictEqual(test_string.TestLatin1(str1), str1);
assert.strictEqual(test_string.TestUtf8(str1), str1);
assert.strictEqual(test_string.TestUtf16(str1), str1);
assert.strictEqual(test_string.TestLatin1AutoLength(str1), str1);
assert.strictEqual(test_string.TestUtf8AutoLength(str1), str1);
assert.strictEqual(test_string.TestUtf16AutoLength(str1), str1);
assert.strictEqual(test_string.TestLatin1External(str1), str1);
assert.strictEqual(test_string.TestUtf16External(str1), str1);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str1), str1);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str1), str1);
assert.strictEqual(test_string.TestLatin1Insufficient(str1), str1.slice(0, 3));
assert.strictEqual(test_string.TestUtf8Insufficient(str1), str1.slice(0, 3));
assert.strictEqual(test_string.TestUtf16Insufficient(str1), str1.slice(0, 3));
assert.strictEqual(test_string.TestPropertyKeyUtf16(str1), str1);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str1), str1);
assert.strictEqual(test_string.Utf16Length(str1), 11);
assert.strictEqual(test_string.Utf8Length(str1), 11);
const latin1Cases = [
{
str: '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿',
utf8Length: 62,
utf8InsufficientIdx: 1,
},
{
str: 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ',
utf8Length: 126,
utf8InsufficientIdx: 1,
},
];
const str2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
assert.strictEqual(test_string.TestLatin1(str2), str2);
assert.strictEqual(test_string.TestUtf8(str2), str2);
assert.strictEqual(test_string.TestUtf16(str2), str2);
assert.strictEqual(test_string.TestLatin1AutoLength(str2), str2);
assert.strictEqual(test_string.TestUtf8AutoLength(str2), str2);
assert.strictEqual(test_string.TestUtf16AutoLength(str2), str2);
assert.strictEqual(test_string.TestLatin1External(str2), str2);
assert.strictEqual(test_string.TestUtf16External(str2), str2);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str2), str2);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str2), str2);
assert.strictEqual(test_string.TestLatin1Insufficient(str2), str2.slice(0, 3));
assert.strictEqual(test_string.TestUtf8Insufficient(str2), str2.slice(0, 3));
assert.strictEqual(test_string.TestUtf16Insufficient(str2), str2.slice(0, 3));
assert.strictEqual(test_string.TestPropertyKeyUtf16(str2), str2);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str2), str2);
assert.strictEqual(test_string.Utf16Length(str2), 62);
assert.strictEqual(test_string.Utf8Length(str2), 62);
const unicodeCases = [
{
str: '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}',
utf8Length: 14,
utf8InsufficientIdx: 1,
},
];
const str3 = '?!@#$%^&*()_+-=[]{}/.,<>\'"\\';
assert.strictEqual(test_string.TestLatin1(str3), str3);
assert.strictEqual(test_string.TestUtf8(str3), str3);
assert.strictEqual(test_string.TestUtf16(str3), str3);
assert.strictEqual(test_string.TestLatin1AutoLength(str3), str3);
assert.strictEqual(test_string.TestUtf8AutoLength(str3), str3);
assert.strictEqual(test_string.TestUtf16AutoLength(str3), str3);
assert.strictEqual(test_string.TestLatin1External(str3), str3);
assert.strictEqual(test_string.TestUtf16External(str3), str3);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str3), str3);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str3), str3);
assert.strictEqual(test_string.TestLatin1Insufficient(str3), str3.slice(0, 3));
assert.strictEqual(test_string.TestUtf8Insufficient(str3), str3.slice(0, 3));
assert.strictEqual(test_string.TestUtf16Insufficient(str3), str3.slice(0, 3));
assert.strictEqual(test_string.TestPropertyKeyUtf16(str3), str3);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str3), str3);
assert.strictEqual(test_string.Utf16Length(str3), 27);
assert.strictEqual(test_string.Utf8Length(str3), 27);
function testLatin1Cases(str) {
assert.strictEqual(test_string.TestLatin1(str), str);
assert.strictEqual(test_string.TestLatin1AutoLength(str), str);
assert.strictEqual(test_string.TestLatin1External(str), str);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str), str);
assert.strictEqual(test_string.TestPropertyKeyLatin1(str), str);
assert.strictEqual(test_string.TestPropertyKeyLatin1AutoLength(str), str);
assert.strictEqual(test_string.Latin1Length(str), str.length);
const str4 = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿';
assert.strictEqual(test_string.TestLatin1(str4), str4);
assert.strictEqual(test_string.TestUtf8(str4), str4);
assert.strictEqual(test_string.TestUtf16(str4), str4);
assert.strictEqual(test_string.TestLatin1AutoLength(str4), str4);
assert.strictEqual(test_string.TestUtf8AutoLength(str4), str4);
assert.strictEqual(test_string.TestUtf16AutoLength(str4), str4);
assert.strictEqual(test_string.TestLatin1External(str4), str4);
assert.strictEqual(test_string.TestUtf16External(str4), str4);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str4), str4);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str4), str4);
assert.strictEqual(test_string.TestLatin1Insufficient(str4), str4.slice(0, 3));
assert.strictEqual(test_string.TestUtf8Insufficient(str4), str4.slice(0, 1));
assert.strictEqual(test_string.TestUtf16Insufficient(str4), str4.slice(0, 3));
assert.strictEqual(test_string.TestPropertyKeyUtf16(str4), str4);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str4), str4);
assert.strictEqual(test_string.Utf16Length(str4), 31);
assert.strictEqual(test_string.Utf8Length(str4), 62);
if (str !== '') {
assert.strictEqual(test_string.TestLatin1Insufficient(str), str.slice(0, kInsufficientIdx));
}
}
const str5 = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ';
assert.strictEqual(test_string.TestLatin1(str5), str5);
assert.strictEqual(test_string.TestUtf8(str5), str5);
assert.strictEqual(test_string.TestUtf16(str5), str5);
assert.strictEqual(test_string.TestLatin1AutoLength(str5), str5);
assert.strictEqual(test_string.TestUtf8AutoLength(str5), str5);
assert.strictEqual(test_string.TestUtf16AutoLength(str5), str5);
assert.strictEqual(test_string.TestLatin1External(str5), str5);
assert.strictEqual(test_string.TestUtf16External(str5), str5);
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str5), str5);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str5), str5);
assert.strictEqual(test_string.TestLatin1Insufficient(str5), str5.slice(0, 3));
assert.strictEqual(test_string.TestUtf8Insufficient(str5), str5.slice(0, 1));
assert.strictEqual(test_string.TestUtf16Insufficient(str5), str5.slice(0, 3));
assert.strictEqual(test_string.TestPropertyKeyUtf16(str5), str5);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str5), str5);
assert.strictEqual(test_string.Utf16Length(str5), 63);
assert.strictEqual(test_string.Utf8Length(str5), 126);
function testUnicodeCases(str, utf8Length, utf8InsufficientIdx) {
assert.strictEqual(test_string.TestUtf8(str), str);
assert.strictEqual(test_string.TestUtf16(str), str);
assert.strictEqual(test_string.TestUtf8AutoLength(str), str);
assert.strictEqual(test_string.TestUtf16AutoLength(str), str);
assert.strictEqual(test_string.TestUtf16External(str), str);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str), str);
assert.strictEqual(test_string.TestPropertyKeyUtf8(str), str);
assert.strictEqual(test_string.TestPropertyKeyUtf8AutoLength(str), str);
assert.strictEqual(test_string.TestPropertyKeyUtf16(str), str);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str), str);
assert.strictEqual(test_string.Utf8Length(str), utf8Length);
assert.strictEqual(test_string.Utf16Length(str), str.length);
const str6 = '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}';
assert.strictEqual(test_string.TestUtf8(str6), str6);
assert.strictEqual(test_string.TestUtf16(str6), str6);
assert.strictEqual(test_string.TestUtf8AutoLength(str6), str6);
assert.strictEqual(test_string.TestUtf16AutoLength(str6), str6);
assert.strictEqual(test_string.TestUtf16External(str6), str6);
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str6), str6);
assert.strictEqual(test_string.TestUtf8Insufficient(str6), str6.slice(0, 1));
assert.strictEqual(test_string.TestUtf16Insufficient(str6), str6.slice(0, 3));
assert.strictEqual(test_string.TestPropertyKeyUtf16(str6), str6);
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str6), str6);
assert.strictEqual(test_string.Utf16Length(str6), 5);
assert.strictEqual(test_string.Utf8Length(str6), 14);
if (str !== '') {
assert.strictEqual(test_string.TestUtf8Insufficient(str), str.slice(0, utf8InsufficientIdx));
assert.strictEqual(test_string.TestUtf16Insufficient(str), str.slice(0, kInsufficientIdx));
}
}
asciiCases.forEach(testLatin1Cases);
asciiCases.forEach((str) => testUnicodeCases(str, str.length, kInsufficientIdx));
latin1Cases.forEach((it) => testLatin1Cases(it.str));
latin1Cases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx));
unicodeCases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx));
assert.throws(() => {
test_string.TestLargeUtf8();

View File

@ -293,6 +293,40 @@ static napi_value TestUtf16Insufficient(napi_env env, napi_callback_info info) {
return output;
}
static napi_value TestPropertyKeyLatin1(napi_env env, napi_callback_info info) {
return TestOneByteImpl(env,
info,
napi_get_value_string_latin1,
node_api_create_property_key_latin1,
actual_length);
}
static napi_value TestPropertyKeyLatin1AutoLength(napi_env env,
napi_callback_info info) {
return TestOneByteImpl(env,
info,
napi_get_value_string_latin1,
node_api_create_property_key_latin1,
auto_length);
}
static napi_value TestPropertyKeyUtf8(napi_env env, napi_callback_info info) {
return TestOneByteImpl(env,
info,
napi_get_value_string_utf8,
node_api_create_property_key_utf8,
actual_length);
}
static napi_value TestPropertyKeyUtf8AutoLength(napi_env env,
napi_callback_info info) {
return TestOneByteImpl(env,
info,
napi_get_value_string_utf8,
node_api_create_property_key_utf8,
auto_length);
}
static napi_value TestPropertyKeyUtf16(napi_env env, napi_callback_info info) {
return TestTwoByteImpl(env,
info,
@ -310,6 +344,20 @@ static napi_value TestPropertyKeyUtf16AutoLength(napi_env env,
auto_length);
}
static napi_value Latin1Length(napi_env env, napi_callback_info info) {
napi_value args[1];
NODE_API_CALL(env, validate_and_retrieve_single_string_arg(env, info, args));
size_t length;
NODE_API_CALL(env,
napi_get_value_string_latin1(env, args[0], NULL, 0, &length));
napi_value output;
NODE_API_CALL(env, napi_create_uint32(env, (uint32_t)length, &output));
return output;
}
static napi_value Utf16Length(napi_env env, napi_callback_info info) {
napi_value args[1];
NODE_API_CALL(env, validate_and_retrieve_single_string_arg(env, info, args));
@ -420,12 +468,19 @@ napi_value Init(napi_env env, napi_value exports) {
DECLARE_NODE_API_PROPERTY("TestUtf16ExternalAutoLength",
TestUtf16ExternalAutoLength),
DECLARE_NODE_API_PROPERTY("TestUtf16Insufficient", TestUtf16Insufficient),
DECLARE_NODE_API_PROPERTY("Latin1Length", Latin1Length),
DECLARE_NODE_API_PROPERTY("Utf16Length", Utf16Length),
DECLARE_NODE_API_PROPERTY("Utf8Length", Utf8Length),
DECLARE_NODE_API_PROPERTY("TestLargeUtf8", TestLargeUtf8),
DECLARE_NODE_API_PROPERTY("TestLargeLatin1", TestLargeLatin1),
DECLARE_NODE_API_PROPERTY("TestLargeUtf16", TestLargeUtf16),
DECLARE_NODE_API_PROPERTY("TestMemoryCorruption", TestMemoryCorruption),
DECLARE_NODE_API_PROPERTY("TestPropertyKeyLatin1", TestPropertyKeyLatin1),
DECLARE_NODE_API_PROPERTY("TestPropertyKeyLatin1AutoLength",
TestPropertyKeyLatin1AutoLength),
DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf8", TestPropertyKeyUtf8),
DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf8AutoLength",
TestPropertyKeyUtf8AutoLength),
DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf16", TestPropertyKeyUtf16),
DECLARE_NODE_API_PROPERTY("TestPropertyKeyUtf16AutoLength",
TestPropertyKeyUtf16AutoLength),