mirror of
https://github.com/nodejs/node.git
synced 2024-11-21 10:59:27 +00:00
net: use icu's punycode implementation
ICU has a punycode implementation built in. Use it instead of the javascript implementation because it's much faster. PR-URL: https://github.com/nodejs/node/pull/7355 Reviewed-By: Trevor Norris <trev.norris@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
This commit is contained in:
parent
3d6a01ed3e
commit
7de59ef925
75
benchmark/net/punycode.js
Normal file
75
benchmark/net/punycode.js
Normal file
@ -0,0 +1,75 @@
|
||||
'use strict';
|
||||
|
||||
const common = require('../common.js');
|
||||
const icu = process.binding('icu');
|
||||
const punycode = require('punycode');
|
||||
|
||||
const bench = common.createBenchmark(main, {
|
||||
method: ['punycode', 'icu'],
|
||||
n: [1024],
|
||||
val: [
|
||||
'افغانستا.icom.museum',
|
||||
'الجزائر.icom.museum',
|
||||
'österreich.icom.museum',
|
||||
'বাংলাদেশ.icom.museum',
|
||||
'беларусь.icom.museum',
|
||||
'belgië.icom.museum',
|
||||
'българия.icom.museum',
|
||||
'تشادر.icom.museum',
|
||||
'中国.icom.museum',
|
||||
'القمر.icom.museum',
|
||||
'κυπρος.icom.museum',
|
||||
'českárepublika.icom.museum',
|
||||
'مصر.icom.museum',
|
||||
'ελλάδα.icom.museum',
|
||||
'magyarország.icom.museum',
|
||||
'ísland.icom.museum',
|
||||
'भारत.icom.museum',
|
||||
'ايران.icom.museum',
|
||||
'éire.icom.museum',
|
||||
'איקו״ם.ישראל.museum',
|
||||
'日本.icom.museum',
|
||||
'الأردن.icom.museum'
|
||||
]
|
||||
});
|
||||
|
||||
function usingPunycode(val) {
|
||||
punycode.toUnicode(punycode.toASCII(val));
|
||||
}
|
||||
|
||||
function usingICU(val) {
|
||||
icu.toUnicode(icu.toASCII(val));
|
||||
}
|
||||
|
||||
function runPunycode(n, val) {
|
||||
common.v8ForceOptimization(usingPunycode, val);
|
||||
var i = 0;
|
||||
bench.start();
|
||||
for (; i < n; i++)
|
||||
usingPunycode(val);
|
||||
bench.end(n);
|
||||
}
|
||||
|
||||
function runICU(n, val) {
|
||||
common.v8ForceOptimization(usingICU, val);
|
||||
var i = 0;
|
||||
bench.start();
|
||||
for (; i < n; i++)
|
||||
usingICU(val);
|
||||
bench.end(n);
|
||||
}
|
||||
|
||||
function main(conf) {
|
||||
const n = +conf.n;
|
||||
const val = conf.val;
|
||||
switch (conf.method) {
|
||||
case 'punycode':
|
||||
runPunycode(n, val);
|
||||
break;
|
||||
case 'icu':
|
||||
runICU(n, val);
|
||||
break;
|
||||
default:
|
||||
throw new Error('Unexpected method');
|
||||
}
|
||||
}
|
12
lib/url.js
12
lib/url.js
@ -1,6 +1,14 @@
|
||||
'use strict';
|
||||
|
||||
const punycode = require('punycode');
|
||||
function importPunycode() {
|
||||
try {
|
||||
return process.binding('icu');
|
||||
} catch (e) {
|
||||
return require('punycode');
|
||||
}
|
||||
}
|
||||
|
||||
const { toASCII } = importPunycode();
|
||||
|
||||
exports.parse = urlParse;
|
||||
exports.resolve = urlResolve;
|
||||
@ -309,7 +317,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
|
||||
// It only converts parts of the domain name that
|
||||
// have non-ASCII characters, i.e. it doesn't matter if
|
||||
// you call it with a domain that already is ASCII-only.
|
||||
this.hostname = punycode.toASCII(this.hostname);
|
||||
this.hostname = toASCII(this.hostname);
|
||||
}
|
||||
|
||||
var p = this.port ? ':' + this.port : '';
|
||||
|
132
src/node_i18n.cc
132
src/node_i18n.cc
@ -23,8 +23,16 @@
|
||||
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
|
||||
#include "node.h"
|
||||
#include "env.h"
|
||||
#include "env-inl.h"
|
||||
#include "util.h"
|
||||
#include "util-inl.h"
|
||||
#include "v8.h"
|
||||
|
||||
#include <unicode/putil.h>
|
||||
#include <unicode/udata.h>
|
||||
#include <unicode/uidna.h>
|
||||
|
||||
#ifdef NODE_HAVE_SMALL_ICU
|
||||
/* if this is defined, we have a 'secondary' entry point.
|
||||
@ -43,6 +51,13 @@ extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
|
||||
|
||||
namespace node {
|
||||
|
||||
using v8::Context;
|
||||
using v8::FunctionCallbackInfo;
|
||||
using v8::Local;
|
||||
using v8::Object;
|
||||
using v8::String;
|
||||
using v8::Value;
|
||||
|
||||
bool flag_icu_data_dir = false;
|
||||
|
||||
namespace i18n {
|
||||
@ -64,7 +79,124 @@ bool InitializeICUDirectory(const char* icu_data_path) {
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
|
||||
const char* input,
|
||||
size_t length) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t options = UIDNA_DEFAULT;
|
||||
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
|
||||
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||
if (U_FAILURE(status))
|
||||
return -1;
|
||||
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||
|
||||
int32_t len = uidna_nameToUnicodeUTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->length(),
|
||||
&info,
|
||||
&status);
|
||||
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
buf->AllocateSufficientStorage(len);
|
||||
len = uidna_nameToUnicodeUTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->length(),
|
||||
&info,
|
||||
&status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status))
|
||||
len = -1;
|
||||
|
||||
uidna_close(uidna);
|
||||
return len;
|
||||
}
|
||||
|
||||
static int32_t ToASCII(MaybeStackBuffer<char>* buf,
|
||||
const char* input,
|
||||
size_t length) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint32_t options = UIDNA_DEFAULT;
|
||||
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
|
||||
UIDNA* uidna = uidna_openUTS46(options, &status);
|
||||
if (U_FAILURE(status))
|
||||
return -1;
|
||||
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
||||
|
||||
int32_t len = uidna_nameToASCII_UTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->length(),
|
||||
&info,
|
||||
&status);
|
||||
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
buf->AllocateSufficientStorage(len);
|
||||
len = uidna_nameToASCII_UTF8(uidna,
|
||||
input, length,
|
||||
**buf, buf->length(),
|
||||
&info,
|
||||
&status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status))
|
||||
len = -1;
|
||||
|
||||
uidna_close(uidna);
|
||||
return len;
|
||||
}
|
||||
|
||||
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
CHECK_GE(args.Length(), 1);
|
||||
CHECK(args[0]->IsString());
|
||||
Utf8Value val(env->isolate(), args[0]);
|
||||
MaybeStackBuffer<char> buf;
|
||||
int32_t len = ToUnicode(&buf, *val, val.length());
|
||||
|
||||
if (len < 0) {
|
||||
return env->ThrowError("Cannot convert name to Unicode");
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(
|
||||
String::NewFromUtf8(env->isolate(),
|
||||
*buf,
|
||||
v8::NewStringType::kNormal,
|
||||
len).ToLocalChecked());
|
||||
}
|
||||
|
||||
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
CHECK_GE(args.Length(), 1);
|
||||
CHECK(args[0]->IsString());
|
||||
Utf8Value val(env->isolate(), args[0]);
|
||||
MaybeStackBuffer<char> buf;
|
||||
int32_t len = ToASCII(&buf, *val, val.length());
|
||||
|
||||
if (len < 0) {
|
||||
return env->ThrowError("Cannot convert name to ASCII");
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(
|
||||
String::NewFromUtf8(env->isolate(),
|
||||
*buf,
|
||||
v8::NewStringType::kNormal,
|
||||
len).ToLocalChecked());
|
||||
}
|
||||
|
||||
void Init(Local<Object> target,
|
||||
Local<Value> unused,
|
||||
Local<Context> context,
|
||||
void* priv) {
|
||||
Environment* env = Environment::GetCurrent(context);
|
||||
env->SetMethod(target, "toUnicode", ToUnicode);
|
||||
env->SetMethod(target, "toASCII", ToASCII);
|
||||
}
|
||||
|
||||
} // namespace i18n
|
||||
} // namespace node
|
||||
|
||||
NODE_MODULE_CONTEXT_AWARE_BUILTIN(icu, node::i18n::Init)
|
||||
|
||||
#endif // NODE_HAVE_I18N_SUPPORT
|
||||
|
72
test/parallel/test-icu-punycode.js
Normal file
72
test/parallel/test-icu-punycode.js
Normal file
@ -0,0 +1,72 @@
|
||||
'use strict';
|
||||
|
||||
const common = require('../common');
|
||||
const icu = getPunycode();
|
||||
const assert = require('assert');
|
||||
|
||||
function getPunycode() {
|
||||
try {
|
||||
return process.binding('icu');
|
||||
} catch (err) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
if (!icu) {
|
||||
common.skip('icu punycode tests because ICU is not present.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Credit for list: http://www.i18nguy.com/markup/idna-examples.html
|
||||
const tests = [
|
||||
'افغانستا.icom.museum',
|
||||
'الجزائر.icom.museum',
|
||||
'österreich.icom.museum',
|
||||
'বাংলাদেশ.icom.museum',
|
||||
'беларусь.icom.museum',
|
||||
'belgië.icom.museum',
|
||||
'българия.icom.museum',
|
||||
'تشادر.icom.museum',
|
||||
'中国.icom.museum',
|
||||
'القمر.icom.museum',
|
||||
'κυπρος.icom.museum',
|
||||
'českárepublika.icom.museum',
|
||||
'مصر.icom.museum',
|
||||
'ελλάδα.icom.museum',
|
||||
'magyarország.icom.museum',
|
||||
'ísland.icom.museum',
|
||||
'भारत.icom.museum',
|
||||
'ايران.icom.museum',
|
||||
'éire.icom.museum',
|
||||
'איקו״ם.ישראל.museum',
|
||||
'日本.icom.museum',
|
||||
'الأردن.icom.museum',
|
||||
'қазақстан.icom.museum',
|
||||
'한국.icom.museum',
|
||||
'кыргызстан.icom.museum',
|
||||
'ລາວ.icom.museum',
|
||||
'لبنان.icom.museum',
|
||||
'македонија.icom.museum',
|
||||
'موريتانيا.icom.museum',
|
||||
'méxico.icom.museum',
|
||||
'монголулс.icom.museum',
|
||||
'المغرب.icom.museum',
|
||||
'नेपाल.icom.museum',
|
||||
'عمان.icom.museum',
|
||||
'قطر.icom.museum',
|
||||
'românia.icom.museum',
|
||||
'россия.иком.museum',
|
||||
'србијаицрнагора.иком.museum',
|
||||
'இலங்கை.icom.museum',
|
||||
'españa.icom.museum',
|
||||
'ไทย.icom.museum',
|
||||
'تونس.icom.museum',
|
||||
'türkiye.icom.museum',
|
||||
'украина.icom.museum',
|
||||
'việtnam.icom.museum'
|
||||
];
|
||||
|
||||
// Testing the roundtrip
|
||||
tests.forEach((i) => {
|
||||
assert.strictEqual(i, icu.toUnicode(icu.toASCII(i)));
|
||||
});
|
@ -37,8 +37,7 @@
|
||||
'defines': [
|
||||
# ICU cannot swap the initial data without this.
|
||||
# http://bugs.icu-project.org/trac/ticket/11046
|
||||
'UCONFIG_NO_LEGACY_CONVERSION=1',
|
||||
'UCONFIG_NO_IDNA=1',
|
||||
'UCONFIG_NO_LEGACY_CONVERSION=1'
|
||||
],
|
||||
}],
|
||||
],
|
||||
@ -428,9 +427,6 @@
|
||||
#'<(icu_path)/source/common/ubidi_props_data.h',
|
||||
# and the callers
|
||||
'<(icu_path)/source/common/ushape.cpp',
|
||||
'<(icu_path)/source/common/usprep.cpp',
|
||||
'<(icu_path)/source/common/uts46.cpp',
|
||||
'<(icu_path)/source/common/uidna.cpp',
|
||||
]}],
|
||||
[ 'icu_ver_major == 57', { 'sources!': [
|
||||
# work around http://bugs.icu-project.org/trac/ticket/12451
|
||||
@ -447,9 +443,6 @@
|
||||
#'<(icu_path)/source/common/ubidi_props_data.h',
|
||||
# and the callers
|
||||
'<(icu_path)/source/common/ushape.cpp',
|
||||
'<(icu_path)/source/common/usprep.cpp',
|
||||
'<(icu_path)/source/common/uts46.cpp',
|
||||
'<(icu_path)/source/common/uidna.cpp',
|
||||
]}],
|
||||
[ 'OS == "solaris"', { 'defines': [
|
||||
'_XOPEN_SOURCE_EXTENDED=0',
|
||||
|
@ -24,7 +24,7 @@
|
||||
"region": "none",
|
||||
"zone": "locales",
|
||||
"converters": "none",
|
||||
"stringprep": "none",
|
||||
"stringprep": "locales",
|
||||
"translit": "none",
|
||||
"brkfiles": "none",
|
||||
"brkdict": "none",
|
||||
@ -34,7 +34,6 @@
|
||||
"remove": [
|
||||
"cnvalias.icu",
|
||||
"postalCodeData.res",
|
||||
"uts46.nrm",
|
||||
"genderList.res",
|
||||
"brkitr/root.res",
|
||||
"unames.icu"
|
||||
|
Loading…
Reference in New Issue
Block a user