mirror of
https://github.com/zebrajr/node.git
synced 2026-01-15 12:15:26 +00:00
module: eliminate performance cost of detection for cjs entry
PR-URL: https://github.com/nodejs/node/pull/52093 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Jacob Smith <jacob@frende.me> Reviewed-By: Richard Lau <rlau@redhat.com>
This commit is contained in:
@@ -9,6 +9,7 @@ const bench = common.createBenchmark(main, {
|
||||
script: [
|
||||
'benchmark/fixtures/require-builtins',
|
||||
'test/fixtures/semicolon',
|
||||
'test/fixtures/snapshot/typescript',
|
||||
],
|
||||
mode: ['process', 'worker'],
|
||||
count: [30],
|
||||
|
||||
@@ -75,6 +75,7 @@ module.exports = {
|
||||
cjsExportsCache,
|
||||
cjsSourceCache,
|
||||
initializeCJS,
|
||||
entryPointSource: undefined, // Set below.
|
||||
Module,
|
||||
wrapSafe,
|
||||
};
|
||||
@@ -1337,8 +1338,15 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {
|
||||
return result;
|
||||
} catch (err) {
|
||||
if (process.mainModule === cjsModuleInstance) {
|
||||
const { enrichCJSError } = require('internal/modules/esm/translators');
|
||||
enrichCJSError(err, content, filename);
|
||||
if (getOptionValue('--experimental-detect-module')) {
|
||||
// For the main entry point, cache the source to potentially retry as ESM.
|
||||
module.exports.entryPointSource = content;
|
||||
} else {
|
||||
// We only enrich the error (print a warning) if we're sure we're going to for-sure throw it; so if we're
|
||||
// retrying as ESM, wait until we know whether we're going to retry before calling `enrichCJSError`.
|
||||
const { enrichCJSError } = require('internal/modules/esm/translators');
|
||||
enrichCJSError(err, content, filename);
|
||||
}
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
@@ -19,6 +19,15 @@ const {
|
||||
} = require('internal/errors').codes;
|
||||
const { BuiltinModule } = require('internal/bootstrap/realm');
|
||||
|
||||
const {
|
||||
shouldRetryAsESM: contextifyShouldRetryAsESM,
|
||||
constants: {
|
||||
syntaxDetectionErrors: {
|
||||
esmSyntaxErrorMessages,
|
||||
throwsOnlyInCommonJSErrorMessages,
|
||||
},
|
||||
},
|
||||
} = internalBinding('contextify');
|
||||
const { validateString } = require('internal/validators');
|
||||
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
|
||||
const internalFS = require('internal/fs/utils');
|
||||
@@ -320,6 +329,31 @@ function normalizeReferrerURL(referrerName) {
|
||||
}
|
||||
|
||||
|
||||
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
|
||||
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
|
||||
/**
|
||||
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
|
||||
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
|
||||
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
|
||||
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
|
||||
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
|
||||
* @param {string} source Module contents
|
||||
*/
|
||||
function shouldRetryAsESM(errorMessage, source) {
|
||||
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
|
||||
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
|
||||
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
|
||||
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Whether we have started executing any user-provided CJS code.
|
||||
// This is set right before we call the wrapped CJS code (not after,
|
||||
// in case we are half-way in the execution when internals check this).
|
||||
@@ -339,6 +373,7 @@ module.exports = {
|
||||
loadBuiltinModule,
|
||||
makeRequireFunction,
|
||||
normalizeReferrerURL,
|
||||
shouldRetryAsESM,
|
||||
stripBOM,
|
||||
toRealPath,
|
||||
hasStartedUserCJSExecution() {
|
||||
|
||||
@@ -5,7 +5,6 @@ const {
|
||||
globalThis,
|
||||
} = primordials;
|
||||
|
||||
const { containsModuleSyntax } = internalBinding('contextify');
|
||||
const { getNearestParentPackageJSONType } = internalBinding('modules');
|
||||
const { getOptionValue } = require('internal/options');
|
||||
const { checkPackageJSONIntegrity } = require('internal/modules/package_json_reader');
|
||||
@@ -87,10 +86,6 @@ function shouldUseESMLoader(mainPath) {
|
||||
|
||||
// No package.json or no `type` field.
|
||||
if (response === undefined || response[0] === 'none') {
|
||||
if (getOptionValue('--experimental-detect-module')) {
|
||||
// If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system.
|
||||
return containsModuleSyntax(undefined, mainPath);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -157,12 +152,43 @@ function runEntryPointWithESMLoader(callback) {
|
||||
* by `require('module')`) even when the entry point is ESM.
|
||||
* This monkey-patchable code is bypassed under `--experimental-default-type=module`.
|
||||
* Because of backwards compatibility, this function is exposed publicly via `import { runMain } from 'node:module'`.
|
||||
* When `--experimental-detect-module` is passed, this function will attempt to run ambiguous (no explicit extension, no
|
||||
* `package.json` type field) entry points as CommonJS first; under certain conditions, it will retry running as ESM.
|
||||
* @param {string} main - First positional CLI argument, such as `'entry.js'` from `node entry.js`
|
||||
*/
|
||||
function executeUserEntryPoint(main = process.argv[1]) {
|
||||
const resolvedMain = resolveMainPath(main);
|
||||
const useESMLoader = shouldUseESMLoader(resolvedMain);
|
||||
if (useESMLoader) {
|
||||
|
||||
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
|
||||
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
|
||||
let retryAsESM = false;
|
||||
if (!useESMLoader) {
|
||||
const cjsLoader = require('internal/modules/cjs/loader');
|
||||
const { Module } = cjsLoader;
|
||||
if (getOptionValue('--experimental-detect-module')) {
|
||||
try {
|
||||
// Module._load is the monkey-patchable CJS module loader.
|
||||
Module._load(main, null, true);
|
||||
} catch (error) {
|
||||
const source = cjsLoader.entryPointSource;
|
||||
const { shouldRetryAsESM } = require('internal/modules/helpers');
|
||||
retryAsESM = shouldRetryAsESM(error.message, source);
|
||||
// In case the entry point is a large file, such as a bundle,
|
||||
// ensure no further references can prevent it being garbage-collected.
|
||||
cjsLoader.entryPointSource = undefined;
|
||||
if (!retryAsESM) {
|
||||
const { enrichCJSError } = require('internal/modules/esm/translators');
|
||||
enrichCJSError(error, source, resolvedMain);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
} else { // `--experimental-detect-module` is not passed
|
||||
Module._load(main, null, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (useESMLoader || retryAsESM) {
|
||||
const mainPath = resolvedMain || main;
|
||||
const mainURL = pathToFileURL(mainPath).href;
|
||||
|
||||
@@ -171,10 +197,6 @@ function executeUserEntryPoint(main = process.argv[1]) {
|
||||
// even after the event loop stops running.
|
||||
return cascadedLoader.import(mainURL, undefined, { __proto__: null }, true);
|
||||
});
|
||||
} else {
|
||||
// Module._load is the monkey-patchable CJS module loader.
|
||||
const { Module } = require('internal/modules/cjs/loader');
|
||||
Module._load(main, null, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -345,6 +345,7 @@ void ContextifyContext::CreatePerIsolateProperties(
|
||||
SetMethod(isolate, target, "makeContext", MakeContext);
|
||||
SetMethod(isolate, target, "compileFunction", CompileFunction);
|
||||
SetMethod(isolate, target, "containsModuleSyntax", ContainsModuleSyntax);
|
||||
SetMethod(isolate, target, "shouldRetryAsESM", ShouldRetryAsESM);
|
||||
}
|
||||
|
||||
void ContextifyContext::RegisterExternalReferences(
|
||||
@@ -352,6 +353,7 @@ void ContextifyContext::RegisterExternalReferences(
|
||||
registry->Register(MakeContext);
|
||||
registry->Register(CompileFunction);
|
||||
registry->Register(ContainsModuleSyntax);
|
||||
registry->Register(ShouldRetryAsESM);
|
||||
registry->Register(PropertyGetterCallback);
|
||||
registry->Register(PropertySetterCallback);
|
||||
registry->Register(PropertyDescriptorCallback);
|
||||
@@ -1404,7 +1406,7 @@ Local<Object> ContextifyContext::CompileFunctionAndCacheResult(
|
||||
// While top-level `await` is not permitted in CommonJS, it returns the same
|
||||
// error message as when `await` is used in a sync function, so we don't use it
|
||||
// as a disambiguation.
|
||||
constexpr std::array<std::string_view, 3> esm_syntax_error_messages = {
|
||||
static std::vector<std::string_view> esm_syntax_error_messages = {
|
||||
"Cannot use import statement outside a module", // `import` statements
|
||||
"Unexpected token 'export'", // `export` statements
|
||||
"Cannot use 'import.meta' outside a module"}; // `import.meta` references
|
||||
@@ -1419,7 +1421,7 @@ constexpr std::array<std::string_view, 3> esm_syntax_error_messages = {
|
||||
// - Top-level `await`: if the user writes `await` at the top level of a
|
||||
// CommonJS module, it will throw a syntax error; but the same code is valid
|
||||
// in ESM.
|
||||
constexpr std::array<std::string_view, 6> throws_only_in_cjs_error_messages = {
|
||||
static std::vector<std::string_view> throws_only_in_cjs_error_messages = {
|
||||
"Identifier 'module' has already been declared",
|
||||
"Identifier 'exports' has already been declared",
|
||||
"Identifier 'require' has already been declared",
|
||||
@@ -1439,6 +1441,10 @@ void ContextifyContext::ContainsModuleSyntax(
|
||||
env, "containsModuleSyntax needs at least 1 argument");
|
||||
}
|
||||
|
||||
// Argument 1: source code
|
||||
CHECK(args[0]->IsString());
|
||||
auto code = args[0].As<String>();
|
||||
|
||||
// Argument 2: filename; if undefined, use empty string
|
||||
Local<String> filename = String::Empty(isolate);
|
||||
if (!args[1]->IsUndefined()) {
|
||||
@@ -1446,28 +1452,6 @@ void ContextifyContext::ContainsModuleSyntax(
|
||||
filename = args[1].As<String>();
|
||||
}
|
||||
|
||||
// Argument 1: source code; if undefined, read from filename in argument 2
|
||||
Local<String> code;
|
||||
if (args[0]->IsUndefined()) {
|
||||
CHECK(!filename.IsEmpty());
|
||||
const char* filename_str = Utf8Value(isolate, filename).out();
|
||||
std::string contents;
|
||||
int result = ReadFileSync(&contents, filename_str);
|
||||
if (result != 0) {
|
||||
isolate->ThrowException(
|
||||
ERR_MODULE_NOT_FOUND(isolate, "Cannot read file %s", filename_str));
|
||||
return;
|
||||
}
|
||||
code = String::NewFromUtf8(isolate,
|
||||
contents.c_str(),
|
||||
v8::NewStringType::kNormal,
|
||||
contents.length())
|
||||
.ToLocalChecked();
|
||||
} else {
|
||||
CHECK(args[0]->IsString());
|
||||
code = args[0].As<String>();
|
||||
}
|
||||
|
||||
// TODO(geoffreybooth): Centralize this rather than matching the logic in
|
||||
// cjs/loader.js and translators.js
|
||||
Local<String> script_id = String::Concat(
|
||||
@@ -1497,75 +1481,97 @@ void ContextifyContext::ContainsModuleSyntax(
|
||||
|
||||
bool should_retry_as_esm = false;
|
||||
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
|
||||
Utf8Value message_value(env->isolate(), try_catch.Message()->Get());
|
||||
auto message = message_value.ToStringView();
|
||||
|
||||
for (const auto& error_message : esm_syntax_error_messages) {
|
||||
if (message.find(error_message) != std::string_view::npos) {
|
||||
should_retry_as_esm = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!should_retry_as_esm) {
|
||||
for (const auto& error_message : throws_only_in_cjs_error_messages) {
|
||||
if (message.find(error_message) != std::string_view::npos) {
|
||||
// Try parsing again where the CommonJS wrapper is replaced by an
|
||||
// async function wrapper. If the new parse succeeds, then the error
|
||||
// was caused by either a top-level declaration of one of the CommonJS
|
||||
// module variables, or a top-level `await`.
|
||||
TryCatchScope second_parse_try_catch(env);
|
||||
code =
|
||||
String::Concat(isolate,
|
||||
String::NewFromUtf8(isolate, "(async function() {")
|
||||
.ToLocalChecked(),
|
||||
code);
|
||||
code = String::Concat(
|
||||
isolate,
|
||||
code,
|
||||
String::NewFromUtf8(isolate, "})();").ToLocalChecked());
|
||||
ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
|
||||
isolate, code, filename, 0, 0, host_defined_options, nullptr);
|
||||
std::ignore = ScriptCompiler::CompileFunction(
|
||||
context,
|
||||
&wrapped_source,
|
||||
params.size(),
|
||||
params.data(),
|
||||
0,
|
||||
nullptr,
|
||||
options,
|
||||
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason);
|
||||
if (!second_parse_try_catch.HasTerminated()) {
|
||||
if (second_parse_try_catch.HasCaught()) {
|
||||
// If on the second parse an error is thrown by ESM syntax, then
|
||||
// what happened was that the user had top-level `await` or a
|
||||
// top-level declaration of one of the CommonJS module variables
|
||||
// above the first `import` or `export`.
|
||||
Utf8Value second_message_value(
|
||||
env->isolate(), second_parse_try_catch.Message()->Get());
|
||||
auto second_message = second_message_value.ToStringView();
|
||||
for (const auto& error_message : esm_syntax_error_messages) {
|
||||
if (second_message.find(error_message) !=
|
||||
std::string_view::npos) {
|
||||
should_retry_as_esm = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No errors thrown in the second parse, so most likely the error
|
||||
// was caused by a top-level `await` or a top-level declaration of
|
||||
// one of the CommonJS module variables.
|
||||
should_retry_as_esm = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
should_retry_as_esm =
|
||||
ContextifyContext::ShouldRetryAsESMInternal(env, code);
|
||||
}
|
||||
args.GetReturnValue().Set(should_retry_as_esm);
|
||||
}
|
||||
|
||||
void ContextifyContext::ShouldRetryAsESM(
|
||||
const FunctionCallbackInfo<Value>& args) {
|
||||
Environment* env = Environment::GetCurrent(args);
|
||||
|
||||
CHECK_EQ(args.Length(), 1); // code
|
||||
|
||||
// Argument 1: source code
|
||||
Local<String> code;
|
||||
CHECK(args[0]->IsString());
|
||||
code = args[0].As<String>();
|
||||
|
||||
bool should_retry_as_esm =
|
||||
ContextifyContext::ShouldRetryAsESMInternal(env, code);
|
||||
|
||||
args.GetReturnValue().Set(should_retry_as_esm);
|
||||
}
|
||||
|
||||
bool ContextifyContext::ShouldRetryAsESMInternal(Environment* env,
|
||||
Local<String> code) {
|
||||
Isolate* isolate = env->isolate();
|
||||
|
||||
Local<String> script_id =
|
||||
FIXED_ONE_BYTE_STRING(isolate, "[retry_as_esm_check]");
|
||||
Local<Symbol> id_symbol = Symbol::New(isolate, script_id);
|
||||
|
||||
Local<PrimitiveArray> host_defined_options =
|
||||
GetHostDefinedOptions(isolate, id_symbol);
|
||||
ScriptCompiler::Source source =
|
||||
GetCommonJSSourceInstance(isolate,
|
||||
code,
|
||||
script_id, // filename
|
||||
0, // line offset
|
||||
0, // column offset
|
||||
host_defined_options,
|
||||
nullptr); // cached_data
|
||||
|
||||
TryCatchScope try_catch(env);
|
||||
ShouldNotAbortOnUncaughtScope no_abort_scope(env);
|
||||
|
||||
// Try parsing where instead of the CommonJS wrapper we use an async function
|
||||
// wrapper. If the parse succeeds, then any CommonJS parse error for this
|
||||
// module was caused by either a top-level declaration of one of the CommonJS
|
||||
// module variables, or a top-level `await`.
|
||||
code = String::Concat(
|
||||
isolate, FIXED_ONE_BYTE_STRING(isolate, "(async function() {"), code);
|
||||
code = String::Concat(isolate, code, FIXED_ONE_BYTE_STRING(isolate, "})();"));
|
||||
|
||||
ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
|
||||
isolate, code, script_id, 0, 0, host_defined_options, nullptr);
|
||||
|
||||
Local<Context> context = env->context();
|
||||
std::vector<Local<String>> params = GetCJSParameters(env->isolate_data());
|
||||
USE(ScriptCompiler::CompileFunction(
|
||||
context,
|
||||
&wrapped_source,
|
||||
params.size(),
|
||||
params.data(),
|
||||
0,
|
||||
nullptr,
|
||||
ScriptCompiler::kNoCompileOptions,
|
||||
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason));
|
||||
|
||||
if (!try_catch.HasTerminated()) {
|
||||
if (try_catch.HasCaught()) {
|
||||
// If on the second parse an error is thrown by ESM syntax, then
|
||||
// what happened was that the user had top-level `await` or a
|
||||
// top-level declaration of one of the CommonJS module variables
|
||||
// above the first `import` or `export`.
|
||||
Utf8Value message_value(env->isolate(), try_catch.Message()->Get());
|
||||
auto message_view = message_value.ToStringView();
|
||||
for (const auto& error_message : esm_syntax_error_messages) {
|
||||
if (message_view.find(error_message) != std::string_view::npos) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No errors thrown in the second parse, so most likely the error
|
||||
// was caused by a top-level `await` or a top-level declaration of
|
||||
// one of the CommonJS module variables.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void CompileFunctionForCJSLoader(
|
||||
const FunctionCallbackInfo<Value>& args) {
|
||||
CHECK(args[0]->IsString());
|
||||
@@ -1724,6 +1730,7 @@ static void CreatePerContextProperties(Local<Object> target,
|
||||
Local<Object> constants = Object::New(env->isolate());
|
||||
Local<Object> measure_memory = Object::New(env->isolate());
|
||||
Local<Object> memory_execution = Object::New(env->isolate());
|
||||
Local<Object> syntax_detection_errors = Object::New(env->isolate());
|
||||
|
||||
{
|
||||
Local<Object> memory_mode = Object::New(env->isolate());
|
||||
@@ -1744,6 +1751,25 @@ static void CreatePerContextProperties(Local<Object> target,
|
||||
|
||||
READONLY_PROPERTY(constants, "measureMemory", measure_memory);
|
||||
|
||||
{
|
||||
Local<Value> esm_syntax_error_messages_array =
|
||||
ToV8Value(context, esm_syntax_error_messages).ToLocalChecked();
|
||||
READONLY_PROPERTY(syntax_detection_errors,
|
||||
"esmSyntaxErrorMessages",
|
||||
esm_syntax_error_messages_array);
|
||||
}
|
||||
|
||||
{
|
||||
Local<Value> throws_only_in_cjs_error_messages_array =
|
||||
ToV8Value(context, throws_only_in_cjs_error_messages).ToLocalChecked();
|
||||
READONLY_PROPERTY(syntax_detection_errors,
|
||||
"throwsOnlyInCommonJSErrorMessages",
|
||||
throws_only_in_cjs_error_messages_array);
|
||||
}
|
||||
|
||||
READONLY_PROPERTY(
|
||||
constants, "syntaxDetectionErrors", syntax_detection_errors);
|
||||
|
||||
target->Set(context, env->constants_string(), constants).Check();
|
||||
}
|
||||
|
||||
|
||||
@@ -106,6 +106,9 @@ class ContextifyContext : public BaseObject {
|
||||
const v8::ScriptCompiler::Source& source);
|
||||
static void ContainsModuleSyntax(
|
||||
const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
static void ShouldRetryAsESM(const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
static bool ShouldRetryAsESMInternal(Environment* env,
|
||||
v8::Local<v8::String> code);
|
||||
static void WeakCallback(
|
||||
const v8::WeakCallbackInfo<ContextifyContext>& data);
|
||||
static void PropertyGetterCallback(
|
||||
|
||||
Reference in New Issue
Block a user