crypto: implement crypto.hash()

This patch introduces a helper crypto.hash() that computes
a digest from the input at one shot. This can be 1.2-1.6x faster
than the object-based createHash() for smaller inputs (<= 5MB)
that are readily available (not streamed) and incur less memory
overhead since no intermediate objects will be created.

PR-URL: https://github.com/nodejs/node/pull/51044
Refs: https://github.com/nodejs/performance/issues/136
Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br>
Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
This commit is contained in:
Joyee Cheung 2024-01-05 22:17:00 +01:00
parent 4cae1b987e
commit 4c46439fe9
9 changed files with 272 additions and 8 deletions

View File

@ -0,0 +1,42 @@
'use strict';
const common = require('../common.js');
const { createHash, hash } = require('crypto');
const path = require('path');
const filepath = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js');
const fs = require('fs');
const assert = require('assert');
const bench = common.createBenchmark(main, {
length: [1000, 100_000],
method: ['md5', 'sha1', 'sha256'],
type: ['string', 'buffer'],
n: [100_000, 1000],
}, {
combinationFilter: ({ length, n }) => {
return length * n <= 100_000 * 1000;
},
});
function main({ length, type, method, n }) {
let data = fs.readFileSync(filepath);
if (type === 'string') {
data = data.toString().slice(0, length);
} else {
data = Uint8Array.prototype.slice.call(data, 0, length);
}
const oneshotHash = hash ?
(method, input) => hash(method, input, 'hex') :
(method, input) => createHash(method).update(input).digest('hex');
const array = [];
for (let i = 0; i < n; i++) {
array.push(null);
}
bench.start();
for (let i = 0; i < n; i++) {
array[i] = oneshotHash(method, data);
}
bench.end(n);
assert.strictEqual(typeof array[n - 1], 'string');
}

View File

@ -3510,6 +3510,67 @@ Computes the Diffie-Hellman secret based on a `privateKey` and a `publicKey`.
Both keys must have the same `asymmetricKeyType`, which must be one of `'dh'`
(for Diffie-Hellman), `'ec'` (for ECDH), `'x448'`, or `'x25519'` (for ECDH-ES).
### `crypto.hash(algorith, data[, outputEncoding])`
<!-- YAML
added:
- REPLACEME
-->
* `algorithm` {string|undefined}
* `data` {string|ArrayBuffer|Buffer|TypedArray|DataView} When `data` is a
string, it will be encoded as UTF-8 before being hashed. If a different
input encoding is desired for a string input, user could encode the string
into a `TypedArray` using either `TextEncoder` or `Buffer.from()` and passing
the encoded `TypedArray` into this API instead.
* `outputEncoding` {string|undefined} [Encoding][encoding] used to encode the
returned digest. **Default:** `'hex'`.
* Returns: {string|Buffer}
A utility for creating one-shot hash digests of data. It can be faster than
the object-based `crypto.createHash()` when hashing a smaller amount of data
(<= 5MB) that's readily available. If the data can be big or if it is streamed,
it's still recommended to use `crypto.createHash()` instead.
The `algorithm` is dependent on the available algorithms supported by the
version of OpenSSL on the platform. Examples are `'sha256'`, `'sha512'`, etc.
On recent releases of OpenSSL, `openssl list -digest-algorithms` will
display the available digest algorithms.
Example:
```cjs
const crypto = require('node:crypto');
const { Buffer } = require('node:buffer');
// Hashing a string and return the result as a hex-encoded string.
const string = 'Node.js';
// 10b3493287f831e81a438811a1ffba01f8cec4b7
console.log(crypto.hash('sha1', string));
// Encode a base64-encoded string into a Buffer, hash it and return
// the result as a buffer.
const base64 = 'Tm9kZS5qcw==';
// <Buffer 10 b3 49 32 87 f8 31 e8 1a 43 88 11 a1 ff ba 01 f8 ce c4 b7>
console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer'));
```
```mjs
import crypto from 'node:crypto';
import { Buffer } from 'node:buffer';
// Hashing a string and return the result as a hex-encoded string.
const string = 'Node.js';
// 10b3493287f831e81a438811a1ffba01f8cec4b7
console.log(crypto.hash('sha1', string));
// Encode a base64-encoded string into a Buffer, hash it and return
// the result as a buffer.
const base64 = 'Tm9kZS5qcw==';
// <Buffer 10 b3 49 32 87 f8 31 e8 1a 43 88 11 a1 ff ba 01 f8 ce c4 b7>
console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer'));
```
### `crypto.generateKey(type, options, callback)`
<!-- YAML

View File

@ -107,6 +107,7 @@ const {
const {
Hash,
Hmac,
hash,
} = require('internal/crypto/hash');
const {
X509Certificate,
@ -219,6 +220,7 @@ module.exports = {
getFips,
setFips,
verify: verifyOneShot,
hash,
// Classes
Certificate,

View File

@ -3,6 +3,7 @@
const {
ObjectSetPrototypeOf,
ReflectApply,
StringPrototypeToLowerCase,
Symbol,
} = primordials;
@ -11,6 +12,7 @@ const {
HashJob,
Hmac: _Hmac,
kCryptoJobAsync,
oneShotDigest,
} = internalBinding('crypto');
const {
@ -29,6 +31,8 @@ const {
const {
lazyDOMException,
normalizeEncoding,
encodingsMap,
} = require('internal/util');
const {
@ -40,6 +44,7 @@ const {
ERR_CRYPTO_HASH_FINALIZED,
ERR_CRYPTO_HASH_UPDATE_FAILED,
ERR_INVALID_ARG_TYPE,
ERR_INVALID_ARG_VALUE,
},
} = require('internal/errors');
@ -47,6 +52,7 @@ const {
validateEncoding,
validateString,
validateUint32,
validateBuffer,
} = require('internal/validators');
const {
@ -188,8 +194,33 @@ async function asyncDigest(algorithm, data) {
throw lazyDOMException('Unrecognized algorithm name', 'NotSupportedError');
}
function hash(algorithm, input, outputEncoding = 'hex') {
validateString(algorithm, 'algorithm');
if (typeof input !== 'string') {
validateBuffer(input, 'input');
}
let normalized = outputEncoding;
// Fast case: if it's 'hex', we don't need to validate it further.
if (outputEncoding !== 'hex') {
validateString(outputEncoding, 'outputEncoding');
normalized = normalizeEncoding(outputEncoding);
// If the encoding is invalid, normalizeEncoding() returns undefined.
if (normalized === undefined) {
// normalizeEncoding() doesn't handle 'buffer'.
if (StringPrototypeToLowerCase(outputEncoding) === 'buffer') {
normalized = 'buffer';
} else {
throw new ERR_INVALID_ARG_VALUE('outputEncoding', outputEncoding);
}
}
}
return oneShotDigest(algorithm, getCachedHashId(algorithm), getHashCache(),
input, normalized, encodingsMap[normalized]);
}
module.exports = {
Hash,
Hmac,
asyncDigest,
hash,
};

View File

@ -109,6 +109,16 @@ enum encoding ParseEncoding(const char* encoding,
return default_encoding;
}
enum encoding ParseEncoding(Isolate* isolate,
Local<Value> encoding_v,
Local<Value> encoding_id,
enum encoding default_encoding) {
if (encoding_id->IsUint32()) {
return static_cast<enum encoding>(encoding_id.As<v8::Uint32>()->Value());
}
return ParseEncoding(isolate, encoding_v, default_encoding);
}
enum encoding ParseEncoding(Isolate* isolate,
Local<Value> encoding_v,

View File

@ -202,6 +202,71 @@ const EVP_MD* GetDigestImplementation(Environment* env,
#endif
}
// crypto.digest(algorithm, algorithmId, algorithmCache,
// input, outputEncoding, outputEncodingId)
void Hash::OneShotDigest(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
CHECK_EQ(args.Length(), 6);
CHECK(args[0]->IsString()); // algorithm
CHECK(args[1]->IsInt32()); // algorithmId
CHECK(args[2]->IsObject()); // algorithmCache
CHECK(args[3]->IsString() || args[3]->IsArrayBufferView()); // input
CHECK(args[4]->IsString()); // outputEncoding
CHECK(args[5]->IsUint32() || args[5]->IsUndefined()); // outputEncodingId
const EVP_MD* md = GetDigestImplementation(env, args[0], args[1], args[2]);
if (md == nullptr) {
Utf8Value method(isolate, args[0]);
std::string message =
"Digest method " + method.ToString() + " is not supported";
return ThrowCryptoError(env, ERR_get_error(), message.c_str());
}
enum encoding output_enc = ParseEncoding(isolate, args[4], args[5], HEX);
int md_len = EVP_MD_size(md);
unsigned int result_size;
ByteSource::Builder output(md_len);
int success;
// On smaller inputs, EVP_Digest() can be slower than the
// deprecated helpers e.g SHA256_XXX. The speedup may not
// be worth using deprecated APIs, however, so we use
// EVP_Digest(), unless there's a better alternative
// in the future.
// https://github.com/openssl/openssl/issues/19612
if (args[3]->IsString()) {
Utf8Value utf8(isolate, args[3]);
success = EVP_Digest(utf8.out(),
utf8.length(),
output.data<unsigned char>(),
&result_size,
md,
nullptr);
} else {
ArrayBufferViewContents<unsigned char> input(args[3]);
success = EVP_Digest(input.data(),
input.length(),
output.data<unsigned char>(),
&result_size,
md,
nullptr);
}
if (!success) {
return ThrowCryptoError(env, ERR_get_error());
}
Local<Value> error;
MaybeLocal<Value> rc = StringBytes::Encode(
env->isolate(), output.data<char>(), md_len, output_enc, &error);
if (rc.IsEmpty()) {
CHECK(!error.IsEmpty());
env->isolate()->ThrowException(error);
return;
}
args.GetReturnValue().Set(rc.FromMaybe(Local<Value>()));
}
void Hash::Initialize(Environment* env, Local<Object> target) {
Isolate* isolate = env->isolate();
Local<Context> context = env->context();
@ -216,6 +281,7 @@ void Hash::Initialize(Environment* env, Local<Object> target) {
SetMethodNoSideEffect(context, target, "getHashes", GetHashes);
SetMethodNoSideEffect(context, target, "getCachedAliases", GetCachedAliases);
SetMethodNoSideEffect(context, target, "oneShotDigest", OneShotDigest);
HashJob::Initialize(env, target);
@ -229,6 +295,7 @@ void Hash::RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(HashDigest);
registry->Register(GetHashes);
registry->Register(GetCachedAliases);
registry->Register(OneShotDigest);
HashJob::RegisterExternalReferences(registry);
@ -294,14 +361,17 @@ bool Hash::HashUpdate(const char* data, size_t len) {
}
void Hash::HashUpdate(const FunctionCallbackInfo<Value>& args) {
Decode<Hash>(args, [](Hash* hash, const FunctionCallbackInfo<Value>& args,
const char* data, size_t size) {
Environment* env = Environment::GetCurrent(args);
if (UNLIKELY(size > INT_MAX))
return THROW_ERR_OUT_OF_RANGE(env, "data is too long");
bool r = hash->HashUpdate(data, size);
args.GetReturnValue().Set(r);
});
Decode<Hash>(args,
[](Hash* hash,
const FunctionCallbackInfo<Value>& args,
const char* data,
size_t size) {
Environment* env = Environment::GetCurrent(args);
if (UNLIKELY(size > INT_MAX))
return THROW_ERR_OUT_OF_RANGE(env, "data is too long");
bool r = hash->HashUpdate(data, size);
args.GetReturnValue().Set(r);
});
}
void Hash::HashDigest(const FunctionCallbackInfo<Value>& args) {

View File

@ -26,6 +26,7 @@ class Hash final : public BaseObject {
static void GetHashes(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetCachedAliases(const v8::FunctionCallbackInfo<v8::Value>& args);
static void OneShotDigest(const v8::FunctionCallbackInfo<v8::Value>& args);
protected:
static void New(const v8::FunctionCallbackInfo<v8::Value>& args);

View File

@ -446,6 +446,10 @@ v8::HeapProfiler::HeapSnapshotOptions GetHeapSnapshotOptions(
v8::Local<v8::Value> options);
} // namespace heap
enum encoding ParseEncoding(v8::Isolate* isolate,
v8::Local<v8::Value> encoding_v,
v8::Local<v8::Value> encoding_id,
enum encoding default_encoding);
} // namespace node
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

View File

@ -0,0 +1,43 @@
'use strict';
// This tests crypto.hash() works.
const common = require('../common');
if (!common.hasCrypto)
common.skip('missing crypto');
const assert = require('assert');
const crypto = require('crypto');
const fixtures = require('../common/fixtures');
const fs = require('fs');
// Test errors for invalid arguments.
[undefined, null, true, 1, () => {}, {}].forEach((invalid) => {
assert.throws(() => { crypto.hash(invalid, 'test'); }, { code: 'ERR_INVALID_ARG_TYPE' });
});
[undefined, null, true, 1, () => {}, {}].forEach((invalid) => {
assert.throws(() => { crypto.hash('sha1', invalid); }, { code: 'ERR_INVALID_ARG_TYPE' });
});
[null, true, 1, () => {}, {}].forEach((invalid) => {
assert.throws(() => { crypto.hash('sha1', 'test', invalid); }, { code: 'ERR_INVALID_ARG_TYPE' });
});
assert.throws(() => { crypto.hash('sha1', 'test', 'not an encoding'); }, { code: 'ERR_INVALID_ARG_VALUE' });
// Test that the output of crypto.hash() is the same as crypto.createHash().
const methods = crypto.getHashes();
const input = fs.readFileSync(fixtures.path('utf8_test_text.txt'));
for (const method of methods) {
for (const outputEncoding of ['buffer', 'hex', 'base64', undefined]) {
const oldDigest = crypto.createHash(method).update(input).digest(outputEncoding || 'hex');
const digestFromBuffer = crypto.hash(method, input, outputEncoding);
assert.deepStrictEqual(digestFromBuffer, oldDigest,
`different result from ${method} with encoding ${outputEncoding}`);
const digestFromString = crypto.hash(method, input.toString(), outputEncoding);
assert.deepStrictEqual(digestFromString, oldDigest,
`different result from ${method} with encoding ${outputEncoding}`);
}
}