src: implement FastByteLengthUtf8 with simdutf::utf8_length_from_latin1

PR-URL: https://github.com/nodejs/node/pull/50840
Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br>
This commit is contained in:
Daniel Lemire
2023-12-19 12:11:19 -05:00
committed by GitHub
parent 14e3444a66
commit 891bd5b7d5
2 changed files with 10 additions and 3 deletions

View File

@@ -2,7 +2,8 @@
const common = require('../common');
const bench = common.createBenchmark(main, {
type: ['one_byte', 'two_bytes', 'three_bytes', 'four_bytes'],
type: ['one_byte', 'two_bytes', 'three_bytes',
'four_bytes', 'latin1'],
encoding: ['utf8', 'base64'],
repeat: [1, 2, 16, 256], // x16
n: [4e6],
@@ -14,6 +15,8 @@ const chars = {
two_bytes: 'ΰαβγδεζηθικλμνξο',
three_bytes: '挰挱挲挳挴挵挶挷挸挹挺挻挼挽挾挿',
four_bytes: '𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𠸎𠸏𠹷𠺝𠺢',
latin1: 'Un homme sage est supérieur à toutes ' +
'les insultes qui peuvent lui être adressées, et la meilleure réponse est la patience et la modération.',
};
function getInput(type, repeat, encoding) {

View File

@@ -743,13 +743,17 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
uint32_t FastByteLengthUtf8(Local<Value> receiver,
const v8::FastOneByteString& source) {
uint32_t result = 0;
// For short inputs, the function call overhead to simdutf is maybe
// not worth it, reserve simdutf for long strings.
if (source.length > 128) {
return simdutf::utf8_length_from_latin1(source.data, source.length);
}
uint32_t length = source.length;
uint32_t result = length;
const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
}