module: eliminate performance cost of detection for cjs entry

PR-URL: https://github.com/nodejs/node/pull/52093
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Jacob Smith <jacob@frende.me>
Reviewed-By: Richard Lau <rlau@redhat.com>
This commit is contained in:
Geoffrey Booth
2024-03-20 08:48:05 -07:00
committed by GitHub
parent f1949ac1ae
commit 8bc745944e
6 changed files with 196 additions and 101 deletions

View File

@@ -9,6 +9,7 @@ const bench = common.createBenchmark(main, {
script: [
'benchmark/fixtures/require-builtins',
'test/fixtures/semicolon',
'test/fixtures/snapshot/typescript',
],
mode: ['process', 'worker'],
count: [30],

View File

@@ -75,6 +75,7 @@ module.exports = {
cjsExportsCache,
cjsSourceCache,
initializeCJS,
entryPointSource: undefined, // Set below.
Module,
wrapSafe,
};
@@ -1337,8 +1338,15 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
return result;
} catch (err) {
if (process.mainModule === cjsModuleInstance) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(err, content, filename);
if (getOptionValue('--experimental-detect-module')) {
// For the main entry point, cache the source to potentially retry as ESM.
module.exports.entryPointSource = content;
} else {
// We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're
// retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`.
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(err, content, filename);
}
}
throw err;
}

View File

@@ -19,6 +19,15 @@ const {
} = require('internal/errors').codes;
const { BuiltinModule } = require('internal/bootstrap/realm');
const {
shouldRetryAsESM: contextifyShouldRetryAsESM,
constants: {
syntaxDetectionErrors: {
esmSyntaxErrorMessages,
throwsOnlyInCommonJSErrorMessages,
},
},
} = internalBinding('contextify');
const { validateString } = require('internal/validators');
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
const internalFS = require('internal/fs/utils');
@@ -320,6 +329,31 @@ function normalizeReferrerURL(referrerName) {
}
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
/**
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
* @param {string} source Module contents
*/
function shouldRetryAsESM(errorMessage, source) {
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
return true;
}
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
}
return false;
}
// Whether we have started executing any user-provided CJS code.
// This is set right before we call the wrapped CJS code (not after,
// in case we are half-way in the execution when internals check this).
@@ -339,6 +373,7 @@ module.exports = {
loadBuiltinModule,
makeRequireFunction,
normalizeReferrerURL,
shouldRetryAsESM,
stripBOM,
toRealPath,
hasStartedUserCJSExecution() {

View File

@@ -5,7 +5,6 @@ const {
globalThis,
} = primordials;
const { containsModuleSyntax } = internalBinding('contextify');
const { getNearestParentPackageJSONType } = internalBinding('modules');
const { getOptionValue } = require('internal/options');
const { checkPackageJSONIntegrity } = require('internal/modules/package_json_reader');
@@ -87,10 +86,6 @@ function shouldUseESMLoader(mainPath) {
// No package.json or no `type` field.
if (response === undefined || response[0] === 'none') {
if (getOptionValue('--experimental-detect-module')) {
// If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system.
return containsModuleSyntax(undefined, mainPath);
}
return false;
}
@@ -157,12 +152,43 @@ function runEntryPointWithESMLoader(callback) {
* by `require('module')`) even when the entry point is ESM.
* This monkey-patchable code is bypassed under `--experimental-default-type=module`.
* Because of backwards compatibility, this function is exposed publicly via `import { runMain } from 'node:module'`.
* When `--experimental-detect-module` is passed, this function will attempt to run ambiguous (no explicit extension, no
* `package.json` type field) entry points as CommonJS first; under certain conditions, it will retry running as ESM.
* @param {string} main - First positional CLI argument, such as `'entry.js'` from `node entry.js`
*/
function executeUserEntryPoint(main = process.argv[1]) {
const resolvedMain = resolveMainPath(main);
const useESMLoader = shouldUseESMLoader(resolvedMain);
if (useESMLoader) {
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
let retryAsESM = false;
if (!useESMLoader) {
const cjsLoader = require('internal/modules/cjs/loader');
const { Module } = cjsLoader;
if (getOptionValue('--experimental-detect-module')) {
try {
// Module._load is the monkey-patchable CJS module loader.
Module._load(main, null, true);
} catch (error) {
const source = cjsLoader.entryPointSource;
const { shouldRetryAsESM } = require('internal/modules/helpers');
retryAsESM = shouldRetryAsESM(error.message, source);
// In case the entry point is a large file, such as a bundle,
// ensure no further references can prevent it being garbage-collected.
cjsLoader.entryPointSource = undefined;
if (!retryAsESM) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(error, source, resolvedMain);
throw error;
}
}
} else { // `--experimental-detect-module` is not passed
Module._load(main, null, true);
}
}
if (useESMLoader || retryAsESM) {
const mainPath = resolvedMain || main;
const mainURL = pathToFileURL(mainPath).href;
@@ -171,10 +197,6 @@ function executeUserEntryPoint(main = process.argv[1]) {
// even after the event loop stops running.
return cascadedLoader.import(mainURL, undefined, { __proto__: null }, true);
});
} else {
// Module._load is the monkey-patchable CJS module loader.
const { Module } = require('internal/modules/cjs/loader');
Module._load(main, null, true);
}
}

View File

@@ -345,6 +345,7 @@ void ContextifyContext::CreatePerIsolateProperties(
SetMethod(isolate, target, "makeContext", MakeContext);
SetMethod(isolate, target, "compileFunction", CompileFunction);
SetMethod(isolate, target, "containsModuleSyntax", ContainsModuleSyntax);
SetMethod(isolate, target, "shouldRetryAsESM", ShouldRetryAsESM);
}
void ContextifyContext::RegisterExternalReferences(
@@ -352,6 +353,7 @@ void ContextifyContext::RegisterExternalReferences(
registry->Register(MakeContext);
registry->Register(CompileFunction);
registry->Register(ContainsModuleSyntax);
registry->Register(ShouldRetryAsESM);
registry->Register(PropertyGetterCallback);
registry->Register(PropertySetterCallback);
registry->Register(PropertyDescriptorCallback);
@@ -1404,7 +1406,7 @@ Local<Object> ContextifyContext::CompileFunctionAndCacheResult(
// While top-level `await` is not permitted in CommonJS, it returns the same
// error message as when `await` is used in a sync function, so we don't use it
// as a disambiguation.
constexpr std::array<std::string_view, 3> esm_syntax_error_messages = {
static std::vector<std::string_view> esm_syntax_error_messages = {
"Cannot use import statement outside a module", // `import` statements
"Unexpected token 'export'", // `export` statements
"Cannot use 'import.meta' outside a module"}; // `import.meta` references
@@ -1419,7 +1421,7 @@ constexpr std::array<std::string_view, 3> esm_syntax_error_messages = {
// - Top-level `await`: if the user writes `await` at the top level of a
// CommonJS module, it will throw a syntax error; but the same code is valid
// in ESM.
constexpr std::array<std::string_view, 6> throws_only_in_cjs_error_messages = {
static std::vector<std::string_view> throws_only_in_cjs_error_messages = {
"Identifier 'module' has already been declared",
"Identifier 'exports' has already been declared",
"Identifier 'require' has already been declared",
@@ -1439,6 +1441,10 @@ void ContextifyContext::ContainsModuleSyntax(
env, "containsModuleSyntax needs at least 1 argument");
}
// Argument 1: source code
CHECK(args[0]->IsString());
auto code = args[0].As<String>();
// Argument 2: filename; if undefined, use empty string
Local<String> filename = String::Empty(isolate);
if (!args[1]->IsUndefined()) {
@@ -1446,28 +1452,6 @@ void ContextifyContext::ContainsModuleSyntax(
filename = args[1].As<String>();
}
// Argument 1: source code; if undefined, read from filename in argument 2
Local<String> code;
if (args[0]->IsUndefined()) {
CHECK(!filename.IsEmpty());
const char* filename_str = Utf8Value(isolate, filename).out();
std::string contents;
int result = ReadFileSync(&contents, filename_str);
if (result != 0) {
isolate->ThrowException(
ERR_MODULE_NOT_FOUND(isolate, "Cannot read file %s", filename_str));
return;
}
code = String::NewFromUtf8(isolate,
contents.c_str(),
v8::NewStringType::kNormal,
contents.length())
.ToLocalChecked();
} else {
CHECK(args[0]->IsString());
code = args[0].As<String>();
}
// TODO(geoffreybooth): Centralize this rather than matching the logic in
// cjs/loader.js and translators.js
Local<String> script_id = String::Concat(
@@ -1497,75 +1481,97 @@ void ContextifyContext::ContainsModuleSyntax(
bool should_retry_as_esm = false;
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
Utf8Value message_value(env->isolate(), try_catch.Message()->Get());
auto message = message_value.ToStringView();
for (const auto& error_message : esm_syntax_error_messages) {
if (message.find(error_message) != std::string_view::npos) {
should_retry_as_esm = true;
break;
}
}
if (!should_retry_as_esm) {
for (const auto& error_message : throws_only_in_cjs_error_messages) {
if (message.find(error_message) != std::string_view::npos) {
// Try parsing again where the CommonJS wrapper is replaced by an
// async function wrapper. If the new parse succeeds, then the error
// was caused by either a top-level declaration of one of the CommonJS
// module variables, or a top-level `await`.
TryCatchScope second_parse_try_catch(env);
code =
String::Concat(isolate,
String::NewFromUtf8(isolate, "(async function() {")
.ToLocalChecked(),
code);
code = String::Concat(
isolate,
code,
String::NewFromUtf8(isolate, "})();").ToLocalChecked());
ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
isolate, code, filename, 0, 0, host_defined_options, nullptr);
std::ignore = ScriptCompiler::CompileFunction(
context,
&wrapped_source,
params.size(),
params.data(),
0,
nullptr,
options,
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason);
if (!second_parse_try_catch.HasTerminated()) {
if (second_parse_try_catch.HasCaught()) {
// If on the second parse an error is thrown by ESM syntax, then
// what happened was that the user had top-level `await` or a
// top-level declaration of one of the CommonJS module variables
// above the first `import` or `export`.
Utf8Value second_message_value(
env->isolate(), second_parse_try_catch.Message()->Get());
auto second_message = second_message_value.ToStringView();
for (const auto& error_message : esm_syntax_error_messages) {
if (second_message.find(error_message) !=
std::string_view::npos) {
should_retry_as_esm = true;
break;
}
}
} else {
// No errors thrown in the second parse, so most likely the error
// was caused by a top-level `await` or a top-level declaration of
// one of the CommonJS module variables.
should_retry_as_esm = true;
}
}
break;
}
}
}
should_retry_as_esm =
ContextifyContext::ShouldRetryAsESMInternal(env, code);
}
args.GetReturnValue().Set(should_retry_as_esm);
}
void ContextifyContext::ShouldRetryAsESM(
const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_EQ(args.Length(), 1); // code
// Argument 1: source code
Local<String> code;
CHECK(args[0]->IsString());
code = args[0].As<String>();
bool should_retry_as_esm =
ContextifyContext::ShouldRetryAsESMInternal(env, code);
args.GetReturnValue().Set(should_retry_as_esm);
}
bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env,
Local<String> code) {
Isolate* isolate = env->isolate();
Local<String> script_id =
FIXED_ONE_BYTE_STRING(isolate, "[retry_as_esm_check]");
Local<Symbol> id_symbol = Symbol::New(isolate, script_id);
Local<PrimitiveArray> host_defined_options =
GetHostDefinedOptions(isolate, id_symbol);
ScriptCompiler::Source source =
GetCommonJSSourceInstance(isolate,
code,
script_id, // filename
0, // line offset
0, // column offset
host_defined_options,
nullptr); // cached_data
TryCatchScope try_catch(env);
ShouldNotAbortOnUncaughtScope no_abort_scope(env);
// Try parsing where instead of the CommonJS wrapper we use an async function
// wrapper. If the parse succeeds, then any CommonJS parse error for this
// module was caused by either a top-level declaration of one of the CommonJS
// module variables, or a top-level `await`.
code = String::Concat(
isolate, FIXED_ONE_BYTE_STRING(isolate, "(async function() {"), code);
code = String::Concat(isolate, code, FIXED_ONE_BYTE_STRING(isolate, "})();"));
ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
isolate, code, script_id, 0, 0, host_defined_options, nullptr);
Local<Context> context = env->context();
std::vector<Local<String>> params = GetCJSParameters(env->isolate_data());
USE(ScriptCompiler::CompileFunction(
context,
&wrapped_source,
params.size(),
params.data(),
0,
nullptr,
ScriptCompiler::kNoCompileOptions,
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason));
if (!try_catch.HasTerminated()) {
if (try_catch.HasCaught()) {
// If on the second parse an error is thrown by ESM syntax, then
// what happened was that the user had top-level `await` or a
// top-level declaration of one of the CommonJS module variables
// above the first `import` or `export`.
Utf8Value message_value(env->isolate(), try_catch.Message()->Get());
auto message_view = message_value.ToStringView();
for (const auto& error_message : esm_syntax_error_messages) {
if (message_view.find(error_message) != std::string_view::npos) {
return true;
}
}
} else {
// No errors thrown in the second parse, so most likely the error
// was caused by a top-level `await` or a top-level declaration of
// one of the CommonJS module variables.
return true;
}
}
return false;
}
static void CompileFunctionForCJSLoader(
const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
@@ -1724,6 +1730,7 @@ static void CreatePerContextProperties(Local<Object> target,
Local<Object> constants = Object::New(env->isolate());
Local<Object> measure_memory = Object::New(env->isolate());
Local<Object> memory_execution = Object::New(env->isolate());
Local<Object> syntax_detection_errors = Object::New(env->isolate());
{
Local<Object> memory_mode = Object::New(env->isolate());
@@ -1744,6 +1751,25 @@ static void CreatePerContextProperties(Local<Object> target,
READONLY_PROPERTY(constants, "measureMemory", measure_memory);
{
Local<Value> esm_syntax_error_messages_array =
ToV8Value(context, esm_syntax_error_messages).ToLocalChecked();
READONLY_PROPERTY(syntax_detection_errors,
"esmSyntaxErrorMessages",
esm_syntax_error_messages_array);
}
{
Local<Value> throws_only_in_cjs_error_messages_array =
ToV8Value(context, throws_only_in_cjs_error_messages).ToLocalChecked();
READONLY_PROPERTY(syntax_detection_errors,
"throwsOnlyInCommonJSErrorMessages",
throws_only_in_cjs_error_messages_array);
}
READONLY_PROPERTY(
constants, "syntaxDetectionErrors", syntax_detection_errors);
target->Set(context, env->constants_string(), constants).Check();
}

View File

@@ -106,6 +106,9 @@ class ContextifyContext : public BaseObject {
const v8::ScriptCompiler::Source& source);
static void ContainsModuleSyntax(
const v8::FunctionCallbackInfo<v8::Value>& args);
static void ShouldRetryAsESM(const v8::FunctionCallbackInfo<v8::Value>& args);
static bool ShouldRetryAsESMInternal(Environment* env,
v8::Local<v8::String> code);
static void WeakCallback(
const v8::WeakCallbackInfo<ContextifyContext>& data);
static void PropertyGetterCallback(