mirror of
https://github.com/zebrajr/node.git
synced 2026-01-15 12:15:26 +00:00
util: add fast path for utf8 encoding
Co-authored-by: Anna Henningsen <anna@addaleax.net> PR-URL: https://github.com/nodejs/node/pull/45412 Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Rich Trott <rtrott@gmail.com> Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
// https://encoding.spec.whatwg.org
|
||||
|
||||
const {
|
||||
Boolean,
|
||||
ObjectCreate,
|
||||
ObjectDefineProperties,
|
||||
ObjectGetOwnPropertyDescriptors,
|
||||
@@ -28,6 +29,8 @@ const kFlags = Symbol('flags');
|
||||
const kEncoding = Symbol('encoding');
|
||||
const kDecoder = Symbol('decoder');
|
||||
const kEncoder = Symbol('encoder');
|
||||
const kUTF8FastPath = Symbol('kUTF8FastPath');
|
||||
const kIgnoreBOM = Symbol('kIgnoreBOM');
|
||||
|
||||
const {
|
||||
getConstructorOf,
|
||||
@@ -49,7 +52,8 @@ const {
|
||||
|
||||
const {
|
||||
encodeInto,
|
||||
encodeUtf8String
|
||||
encodeUtf8String,
|
||||
decodeUTF8,
|
||||
} = internalBinding('buffer');
|
||||
|
||||
let Buffer;
|
||||
@@ -397,19 +401,40 @@ function makeTextDecoderICU() {
|
||||
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
|
||||
}
|
||||
|
||||
const handle = getConverter(enc, flags);
|
||||
if (handle === undefined)
|
||||
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
|
||||
// Only support fast path for UTF-8 without FATAL flag
|
||||
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
|
||||
|
||||
this[kDecoder] = true;
|
||||
this[kHandle] = handle;
|
||||
this[kFlags] = flags;
|
||||
this[kEncoding] = enc;
|
||||
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
|
||||
this[kUTF8FastPath] = fastPathAvailable;
|
||||
this[kHandle] = undefined;
|
||||
|
||||
if (!fastPathAvailable) {
|
||||
this.#prepareConverter();
|
||||
}
|
||||
}
|
||||
|
||||
#prepareConverter() {
|
||||
if (this[kHandle] !== undefined) return;
|
||||
const handle = getConverter(this[kEncoding], this[kFlags]);
|
||||
if (handle === undefined)
|
||||
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
|
||||
this[kHandle] = handle;
|
||||
}
|
||||
|
||||
decode(input = empty, options = kEmptyObject) {
|
||||
validateDecoder(this);
|
||||
|
||||
this[kUTF8FastPath] &&= !(options?.stream);
|
||||
|
||||
if (this[kUTF8FastPath]) {
|
||||
return decodeUTF8(input, this[kIgnoreBOM]);
|
||||
}
|
||||
|
||||
this.#prepareConverter();
|
||||
|
||||
validateObject(options, 'options', {
|
||||
nullable: true,
|
||||
allowArray: true,
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "node_blob.h"
|
||||
#include "node_errors.h"
|
||||
#include "node_external_reference.h"
|
||||
#include "node_i18n.h"
|
||||
#include "node_internals.h"
|
||||
|
||||
#include "env-inl.h"
|
||||
@@ -565,6 +566,48 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
|
||||
args.GetReturnValue().Set(ret);
|
||||
}
|
||||
|
||||
// Convert the input into an encoded string
|
||||
void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args); // list, flags
|
||||
|
||||
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
|
||||
args[0]->IsArrayBufferView())) {
|
||||
return node::THROW_ERR_INVALID_ARG_TYPE(
|
||||
env->isolate(),
|
||||
"The \"list\" argument must be an instance of SharedArrayBuffer, "
|
||||
"ArrayBuffer or ArrayBufferView.");
|
||||
}
|
||||
|
||||
ArrayBufferViewContents<char> buffer(args[0]);
|
||||
|
||||
CHECK(args[1]->IsBoolean());
|
||||
bool ignore_bom = args[1]->IsTrue();
|
||||
|
||||
const char* data = buffer.data();
|
||||
size_t length = buffer.length();
|
||||
|
||||
if (!ignore_bom && length >= 3) {
|
||||
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
|
||||
data += 3;
|
||||
length -= 3;
|
||||
}
|
||||
}
|
||||
|
||||
if (length == 0) return args.GetReturnValue().SetEmptyString();
|
||||
|
||||
Local<Value> error;
|
||||
MaybeLocal<Value> maybe_ret =
|
||||
StringBytes::Encode(env->isolate(), data, length, UTF8, &error);
|
||||
Local<Value> ret;
|
||||
|
||||
if (!maybe_ret.ToLocal(&ret)) {
|
||||
CHECK(!error.IsEmpty());
|
||||
env->isolate()->ThrowException(error);
|
||||
return;
|
||||
}
|
||||
|
||||
args.GetReturnValue().Set(ret);
|
||||
}
|
||||
|
||||
// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
|
||||
void Copy(const FunctionCallbackInfo<Value> &args) {
|
||||
@@ -1282,6 +1325,7 @@ void Initialize(Local<Object> target,
|
||||
|
||||
SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
|
||||
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
|
||||
SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8);
|
||||
|
||||
SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8);
|
||||
SetMethod(context, target, "copy", Copy);
|
||||
@@ -1339,6 +1383,7 @@ void Initialize(Local<Object> target,
|
||||
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
|
||||
registry->Register(SetBufferPrototype);
|
||||
registry->Register(CreateFromString);
|
||||
registry->Register(DecodeUTF8);
|
||||
|
||||
registry->Register(ByteLengthUtf8);
|
||||
registry->Register(Copy);
|
||||
|
||||
@@ -113,7 +113,7 @@ if (common.hasIntl) {
|
||||
' fatal: false,\n' +
|
||||
' ignoreBOM: true,\n' +
|
||||
' [Symbol(flags)]: 4,\n' +
|
||||
' [Symbol(handle)]: Converter {}\n' +
|
||||
' [Symbol(handle)]: undefined\n' +
|
||||
'}'
|
||||
);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user