mirror of
https://github.com/zebrajr/node.git
synced 2026-01-15 12:15:26 +00:00
util: add fast path for text-decoder fatal flag
PR-URL: https://github.com/nodejs/node/pull/45803 Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Michael Dawson <midawson@redhat.com>
This commit is contained in:
committed by
Node.js GitHub Bot
parent
f4c200dc76
commit
2ef13b8fb6
@@ -5,13 +5,14 @@ const common = require('../common.js');
|
||||
const bench = common.createBenchmark(main, {
|
||||
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
|
||||
ignoreBOM: [0, 1],
|
||||
fatal: [0, 1],
|
||||
len: [256, 1024 * 16, 1024 * 512],
|
||||
n: [1e2],
|
||||
type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer']
|
||||
});
|
||||
|
||||
function main({ encoding, len, n, ignoreBOM, type }) {
|
||||
const decoder = new TextDecoder(encoding, { ignoreBOM });
|
||||
function main({ encoding, len, n, ignoreBOM, type, fatal }) {
|
||||
const decoder = new TextDecoder(encoding, { ignoreBOM, fatal });
|
||||
let buf;
|
||||
|
||||
switch (type) {
|
||||
@@ -31,7 +32,11 @@ function main({ encoding, len, n, ignoreBOM, type }) {
|
||||
|
||||
bench.start();
|
||||
for (let i = 0; i < n; i++) {
|
||||
decoder.decode(buf);
|
||||
try {
|
||||
decoder.decode(buf);
|
||||
} catch {
|
||||
// eslint-disable no-empty
|
||||
}
|
||||
}
|
||||
bench.end(n);
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ const kFlags = Symbol('flags');
|
||||
const kEncoding = Symbol('encoding');
|
||||
const kDecoder = Symbol('decoder');
|
||||
const kEncoder = Symbol('encoder');
|
||||
const kFatal = Symbol('kFatal');
|
||||
const kUTF8FastPath = Symbol('kUTF8FastPath');
|
||||
const kIgnoreBOM = Symbol('kIgnoreBOM');
|
||||
|
||||
@@ -396,17 +397,16 @@ function makeTextDecoderICU() {
|
||||
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
|
||||
}
|
||||
|
||||
// Only support fast path for UTF-8 without FATAL flag
|
||||
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
|
||||
|
||||
this[kDecoder] = true;
|
||||
this[kFlags] = flags;
|
||||
this[kEncoding] = enc;
|
||||
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
|
||||
this[kUTF8FastPath] = fastPathAvailable;
|
||||
this[kFatal] = Boolean(options?.fatal);
|
||||
// Only support fast path for UTF-8.
|
||||
this[kUTF8FastPath] = enc === 'utf-8';
|
||||
this[kHandle] = undefined;
|
||||
|
||||
if (!fastPathAvailable) {
|
||||
if (!this[kUTF8FastPath]) {
|
||||
this.#prepareConverter();
|
||||
}
|
||||
}
|
||||
@@ -425,7 +425,7 @@ function makeTextDecoderICU() {
|
||||
this[kUTF8FastPath] &&= !(options?.stream);
|
||||
|
||||
if (this[kUTF8FastPath]) {
|
||||
return decodeUTF8(input, this[kIgnoreBOM]);
|
||||
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
|
||||
}
|
||||
|
||||
this.#prepareConverter();
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "node_internals.h"
|
||||
|
||||
#include "env-inl.h"
|
||||
#include "simdutf.h"
|
||||
#include "string_bytes.h"
|
||||
#include "string_search.h"
|
||||
#include "util-inl.h"
|
||||
@@ -583,10 +584,20 @@ void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
|
||||
ArrayBufferViewContents<char> buffer(args[0]);
|
||||
|
||||
bool ignore_bom = args[1]->IsTrue();
|
||||
bool has_fatal = args[2]->IsTrue();
|
||||
|
||||
const char* data = buffer.data();
|
||||
size_t length = buffer.length();
|
||||
|
||||
if (has_fatal) {
|
||||
auto result = simdutf::validate_utf8_with_errors(data, length);
|
||||
|
||||
if (result.error) {
|
||||
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
|
||||
env->isolate(), "The encoded data was not valid for encoding utf-8");
|
||||
}
|
||||
}
|
||||
|
||||
if (!ignore_bom && length >= 3) {
|
||||
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
|
||||
data += 3;
|
||||
|
||||
Reference in New Issue
Block a user