Prefer cubin over PTX when we launch CUDA kernels.

Native GPU code, if we have it, should be preferred over JIT compilation of PTX. PiperOrigin-RevId: 174110646
2026-01-15 12:15:41 +00:00 · 2017-10-31 16:25:00 -07:00
parent 2ccf3aba42
commit 8e732a3124
1 changed files with 15 additions and 15 deletions
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -234,6 +234,21 @@ bool CUDAExecutor::GetKernel(const MultiKernelLoaderSpec &spec,
  if (on_disk_spec != nullptr) {
    LOG(WARNING) << "loading CUDA kernel from disk is not supported";
    return false;
+  } else if (spec.has_cuda_cubin_in_memory()) {
+    kernelname = &spec.cuda_cubin_in_memory().kernelname();
+    const char *cubin = spec.cuda_cubin_in_memory().bytes();
+    mutex_lock lock{in_memory_modules_mu_};
+    module = in_memory_modules_[cubin];
+
+    if (module == nullptr) {
+      auto load_status = CUDADriver::LoadCubin(context_, cubin, &module);
+      if (!load_status.ok()) {
+        LOG(ERROR) << "failed to load CUBIN: " << load_status;
+        return false;
+      }
+
+      in_memory_modules_[cubin] = module;
+    }
  } else if (spec.has_cuda_ptx_in_memory()) {
    kernelname = &spec.cuda_ptx_in_memory().kernelname();

@@ -276,21 +291,6 @@ bool CUDAExecutor::GetKernel(const MultiKernelLoaderSpec &spec,
      }
      in_memory_modules_[orig_ptx] = module;
    }
-  } else if (spec.has_cuda_cubin_in_memory()) {
-    kernelname = &spec.cuda_cubin_in_memory().kernelname();
-    const char *cubin = spec.cuda_cubin_in_memory().bytes();
-    mutex_lock lock{in_memory_modules_mu_};
-    module = in_memory_modules_[cubin];
-
-    if (module == nullptr) {
-      auto load_status = CUDADriver::LoadCubin(context_, cubin, &module);
-      if (!load_status.ok()) {
-        LOG(ERROR) << "failed to load CUBIN: " << load_status;
-        return false;
-      }
-
-      in_memory_modules_[cubin] = module;
-    }
  } else {
    LOG(WARNING) << "no method of loading CUDA kernel provided";
    return false;