src: fix endianness of simdutf

PR-URL: https://github.com/nodejs/node/pull/46257
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: Stephen Belanger <admin@stephenbelanger.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
This commit is contained in:
Yagiz Nizipli
2023-01-18 15:59:20 -05:00
committed by Node.js GitHub Bot
parent 7b2f1a1809
commit 95c064d46e
2 changed files with 4 additions and 18 deletions

View File

@@ -248,10 +248,10 @@ bool BuiltinLoader::Add(const char* id, std::string_view utf8source) {
size_t expected_u16_length =
simdutf::utf16_length_from_utf8(utf8source.data(), utf8source.length());
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
size_t u16_length = simdutf::convert_utf8_to_utf16le(
utf8source.data(),
utf8source.length(),
reinterpret_cast<char16_t*>(out->data()));
size_t u16_length =
simdutf::convert_utf8_to_utf16(utf8source.data(),
utf8source.length(),
reinterpret_cast<char16_t*>(out->data()));
out->resize(u16_length);
return Add(id, UnionBytes(out));
}

View File

@@ -299,17 +299,3 @@ TEST(UtilTest, SPrintF) {
const std::string with_zero = std::string("a") + '\0' + 'b';
EXPECT_EQ(SPrintF("%s", with_zero), with_zero);
}
TEST(UtilTest, SimdutfEndiannessDoesNotMeanEndianness) {
// In simdutf, "LE" does *not* refer to Little Endian, it refers
// to 16-byte code units that are stored using *host* endianness.
// This is weird and confusing naming, and so we add this assertion
// here to verify that this is actually the case (so that CI tells
// us if it changed, because for most people Little Endian is
// host endianness, so locally everything would work fine).
const char utf8source[] = "\xe7\x8c\xab";
char16_t u16output;
size_t u16len = simdutf::convert_utf8_to_utf16le(utf8source, 3, &u16output);
EXPECT_EQ(u16len, 1u);
EXPECT_EQ(u16output, 0x732B);
}