buffer: use simdutf convert_latin1_to_utf8_safe

simdutf 5.5 includes convert_latin1_to_utf8_safe

PR-URL: https://github.com/nodejs/node/pull/54798
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Daniel Lemire <daniel@lemire.me>
This commit is contained in:
Robert Nagy 2024-09-18 20:39:01 +02:00 committed by GitHub
parent f8b7a17146
commit 0d4387ebe2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1440,58 +1440,6 @@ void CopyArrayBuffer(const FunctionCallbackInfo<Value>& args) {
memcpy(dest, src, bytes_to_copy); memcpy(dest, src, bytes_to_copy);
} }
size_t convert_latin1_to_utf8_s(const char* src,
size_t src_len,
char* dst,
size_t dst_len) noexcept {
size_t src_pos = 0;
size_t dst_pos = 0;
const auto safe_len = std::min(src_len, dst_len >> 1);
if (safe_len > 16) {
// convert_latin1_to_utf8 will never write more than input length * 2.
dst_pos += simdutf::convert_latin1_to_utf8(src, safe_len, dst);
src_pos += safe_len;
}
// Based on:
// https://github.com/simdutf/simdutf/blob/master/src/scalar/latin1_to_utf8/latin1_to_utf8.h
// with an upper limit on the number of bytes to write.
const auto src_ptr = reinterpret_cast<const uint8_t*>(src);
const auto dst_ptr = reinterpret_cast<uint8_t*>(dst);
size_t skip_pos = src_pos;
while (src_pos < src_len && dst_pos < dst_len) {
if (skip_pos <= src_pos && src_pos + 16 <= src_len &&
dst_pos + 16 <= dst_len) {
uint64_t v1;
memcpy(&v1, src_ptr + src_pos + 0, 8);
uint64_t v2;
memcpy(&v2, src_ptr + src_pos + 8, 8);
if (((v1 | v2) & UINT64_C(0x8080808080808080)) == 0) {
memcpy(dst_ptr + dst_pos, src_ptr + src_pos, 16);
dst_pos += 16;
src_pos += 16;
} else {
skip_pos = src_pos + 16;
}
} else {
const auto byte = src_ptr[src_pos++];
if ((byte & 0x80) == 0) {
dst_ptr[dst_pos++] = byte;
} else if (dst_pos + 2 <= dst_len) {
dst_ptr[dst_pos++] = (byte >> 6) | 0b11000000;
dst_ptr[dst_pos++] = (byte & 0b111111) | 0b10000000;
} else {
break;
}
}
}
return dst_pos;
}
template <encoding encoding> template <encoding encoding>
uint32_t WriteOneByteString(const char* src, uint32_t WriteOneByteString(const char* src,
uint32_t src_len, uint32_t src_len,
@ -1502,7 +1450,7 @@ uint32_t WriteOneByteString(const char* src,
} }
if (encoding == UTF8) { if (encoding == UTF8) {
return convert_latin1_to_utf8_s(src, src_len, dst, dst_len); return simdutf::convert_latin1_to_utf8_safe(src, src_len, dst, dst_len);
} else if (encoding == LATIN1 || encoding == ASCII) { } else if (encoding == LATIN1 || encoding == ASCII) {
const auto size = std::min(src_len, dst_len); const auto size = std::min(src_len, dst_len);
memcpy(dst, src, size); memcpy(dst, src, size);