mirror of
https://github.com/zebrajr/tensorflow.git
synced 2026-01-15 12:15:41 +00:00
[XLA:GPU] Short-circuit compilation of no-op IR -> empty PTX.
There's no point constructing/running LLVM pipeline if we know that we have no kernels in the IR we've generated for the given HLO op. This is often the case for ops we can optimize away at the HLO level. PiperOrigin-RevId: 174072540
This commit is contained in:
committed by
TensorFlower Gardener
parent
c911d0f169
commit
66fc99a3b5
@@ -342,6 +342,13 @@ StatusOr<string> CompileModuleToPtx(llvm::Module* module,
|
||||
std::pair<int, int> compute_capability,
|
||||
const HloModuleConfig& hlo_module_config,
|
||||
const string& libdevice_dir_path) {
|
||||
// If the module has no functions or globals, there's nothing to compile. Just
|
||||
// return an empty string.
|
||||
if (module->empty() && module->global_empty()) {
|
||||
VLOG(2) << "Module '" << llvm_ir::AsString(module->getName())
|
||||
<< "' is empty. Skipping compilation.";
|
||||
return string();
|
||||
}
|
||||
// Link the input module with libdevice, to pull in implementations of some
|
||||
// builtins.
|
||||
TF_RETURN_IF_ERROR(
|
||||
|
||||
Reference in New Issue
Block a user