LibRegex: Clamp large quantifier values instead of rejecting them

Fixes parsing of regex quantifiers with extremely large numeric values.
Previously, very large quantifiers would fail to parse, but Chrome and
Firefox both clamp such large values to 2^31-1 instead of rejecting
them. So now we do the same.
This commit is contained in:
aplefull
2025-12-06 22:01:30 +01:00
committed by Ali Mohammad Pur
parent 2bd6bf69ac
commit 52a3c19c0a
2 changed files with 28 additions and 15 deletions

View File

@@ -21,7 +21,7 @@
namespace regex {
static constexpr size_t s_maximum_repetition_count = 1024 * 1024;
static constexpr u64 s_ecma262_maximum_repetition_count = (1ull << 53) - 1;
static constexpr u64 s_ecma262_maximum_repetition_count = (1ull << 31) - 1;
static constexpr auto s_alphabetic_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"sv;
static constexpr auto s_decimal_characters = "0123456789"sv;
@@ -1333,9 +1333,7 @@ bool ECMA262Parser::parse_interval_quantifier(Optional<u64>& repeat_min, Optiona
auto low_bound_string = read_digits_as_string();
chars_consumed += low_bound_string.length();
auto low_bound = low_bound_string.to_number<u64>();
if (!low_bound.has_value()) {
if (low_bound_string.is_empty()) {
if (!m_should_use_browser_extended_grammar && done())
return set_error(Error::MismatchingBrace);
@@ -1343,16 +1341,27 @@ bool ECMA262Parser::parse_interval_quantifier(Optional<u64>& repeat_min, Optiona
return false;
}
repeat_min = low_bound.value();
auto low_bound = low_bound_string.to_number<u64>();
if (!low_bound.has_value() || low_bound.value() > s_ecma262_maximum_repetition_count) {
repeat_min = s_ecma262_maximum_repetition_count;
} else {
repeat_min = low_bound.value();
}
if (match(TokenType::Comma)) {
consume();
++chars_consumed;
auto high_bound_string = read_digits_as_string();
auto high_bound = high_bound_string.to_number<u64>();
if (high_bound.has_value()) {
repeat_max = high_bound.value();
if (!high_bound_string.is_empty()) {
chars_consumed += high_bound_string.length();
if (!high_bound.has_value() || high_bound.value() > s_ecma262_maximum_repetition_count) {
repeat_max = s_ecma262_maximum_repetition_count;
} else {
repeat_max = high_bound.value();
}
}
} else {
repeat_max = repeat_min;
@@ -1374,9 +1383,6 @@ bool ECMA262Parser::parse_interval_quantifier(Optional<u64>& repeat_min, Optiona
set_error(Error::InvalidBraceContent);
}
if ((*repeat_min > s_ecma262_maximum_repetition_count) || (repeat_max.has_value() && (*repeat_max > s_ecma262_maximum_repetition_count)))
return set_error(Error::InvalidBraceContent);
return true;
}

View File

@@ -581,11 +581,18 @@ TEST_CASE(ECMA262_parse)
{ "a{9007199254740991}"sv }, // 2^53 - 1
{ "a{9007199254740991,}"sv },
{ "a{9007199254740991,9007199254740991}"sv },
{ "a{9007199254740992}"sv, regex::Error::InvalidBraceContent },
{ "a{9007199254740992,}"sv, regex::Error::InvalidBraceContent },
{ "a{9007199254740991,9007199254740992}"sv, regex::Error::InvalidBraceContent },
{ "a{9007199254740992,9007199254740991}"sv, regex::Error::InvalidBraceContent },
{ "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
{ "a{9007199254740992}"sv },
{ "a{9007199254740992,}"sv },
{ "a{9007199254740991,9007199254740992}"sv },
{ "a{9007199254740992,9007199254740991}"sv },
{ "a{9007199254740992,9007199254740992}"sv },
{ "a{1,99999999999999999999999999999999999999999999999999}"sv },
{ "a{99999999999999999999999999999999999999999999999999,1}"sv, regex::Error::InvalidBraceContent },
{ "a{99999999999999999999999999999999999999999999999999}"sv },
{ "a{2147483647}"sv }, // 2^31 - 1
{ "a{2147483648}"sv }, // 2^31
{ "a{2147483648,2147483647}"sv },
{ "a{2147483647,2147483646}"sv, regex::Error::InvalidBraceContent },
{ "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
{ "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
{ "(?<a>(?<a>a))"sv, regex::Error::DuplicateNamedCapture },