diff --git a/.bazelrc b/.bazelrc index 325819653b2..4ad2a1ffa97 100644 --- a/.bazelrc +++ b/.bazelrc @@ -266,9 +266,10 @@ build:mkl_aarch64 -c opt build:mkl_aarch64_threadpool --define=build_with_mkl_aarch64=true build:mkl_aarch64_threadpool -c opt -# Default CUDA and CUDNN versions. +# Default CUDA, CUDNN and NVSHMEM versions. build:cuda_version --repo_env=HERMETIC_CUDA_VERSION="12.5.1" build:cuda_version --repo_env=HERMETIC_CUDNN_VERSION="9.3.0" +build:cuda_version --repo_env=HERMETIC_NVSHMEM_VERSION="3.2.5" # CUDA: This config refers to building CUDA op kernels with nvcc. build:cuda --repo_env TF_NEED_CUDA=1 @@ -280,6 +281,7 @@ build:cuda --@local_config_cuda//cuda:include_cuda_libs=true # This configuration is used for building the wheels. build:cuda_wheel --@local_config_cuda//cuda:include_cuda_libs=false +build:cuda_wheel --@local_config_nvshmem//:include_nvshmem_libs=false # CUDA: This config refers to building CUDA op kernels with clang. build:cuda_clang --config=cuda diff --git a/WORKSPACE b/WORKSPACE index 43a224e1df9..5fc07a20385 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -138,3 +138,30 @@ load( ) nccl_configure(name = "local_config_nccl") + +load( + "@local_xla//third_party/nvshmem/hermetic:nvshmem_json_init_repository.bzl", + "nvshmem_json_init_repository", +) + +nvshmem_json_init_repository() + +load( + "@nvshmem_redist_json//:distributions.bzl", + "NVSHMEM_REDISTRIBUTIONS", +) +load( + "@local_xla//third_party/nvshmem/hermetic:nvshmem_redist_init_repository.bzl", + "nvshmem_redist_init_repository", +) + +nvshmem_redist_init_repository( + nvshmem_redistributions = NVSHMEM_REDISTRIBUTIONS, +) + +load( + "@local_xla//third_party/nvshmem/hermetic:nvshmem_configure.bzl", + "nvshmem_configure", +) + +nvshmem_configure(name = "local_config_nvshmem") diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 995156cdde6..8cf5a53dd34 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -1091,6 +1091,7 @@ bzl_library( "@local_xla//third_party/llvm_openmp:openmp_bzl", "@local_xla//third_party/py/rules_pywrap:pywrap_bzl", "@local_xla//xla/tsl:tsl_bzl", + "@local_xla//xla/tsl:tsl_default_bzl", "@local_xla//xla/tsl/mkl:build_defs_bzl", "@rules_java//java:rules", ], diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index b65e5f720ed..eb88a66160e 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -1,7 +1,7 @@ load("@bazel_skylib//lib:selects.bzl", "selects") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm") -load("@local_xla//xla/tsl:tsl.bzl", "if_cuda_libs") +load("@local_xla//xla/tsl:tsl.default.bzl", "if_cuda_libs") load( "//tensorflow:tensorflow.bzl", "clean_dep", diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 744e01823c1..c8efcc6b0ba 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -72,10 +72,13 @@ load( _cc_header_only_library = "cc_header_only_library", _custom_op_cc_header_only_library = "custom_op_cc_header_only_library", _if_cuda_or_rocm = "if_cuda_or_rocm", - _if_cuda_tools = "if_cuda_tools", _if_nccl = "if_nccl", _transitive_hdrs = "transitive_hdrs", ) +load( + "@local_xla//xla/tsl:tsl.default.bzl", + _if_cuda_tools = "if_cuda_tools", +) load( "@local_config_tensorrt//:build_defs.bzl", "if_tensorrt", diff --git a/tensorflow/tools/pip_package/utils/tf_wheel.bzl b/tensorflow/tools/pip_package/utils/tf_wheel.bzl index c4dcd4682c8..fa66b202821 100644 --- a/tensorflow/tools/pip_package/utils/tf_wheel.bzl +++ b/tensorflow/tools/pip_package/utils/tf_wheel.bzl @@ -77,11 +77,18 @@ def _is_dest_file(basename, dest_files_suffixes): def _tf_wheel_impl(ctx): include_cuda_libs = ctx.attr.include_cuda_libs[BuildSettingInfo].value override_include_cuda_libs = ctx.attr.override_include_cuda_libs[BuildSettingInfo].value + include_nvshmem_libs = ctx.attr.include_nvshmem_libs[BuildSettingInfo].value + override_include_nvshmem_libs = ctx.attr.override_include_nvshmem_libs[BuildSettingInfo].value if include_cuda_libs and not override_include_cuda_libs: fail("TF wheel shouldn't be built with CUDA dependencies." + " Please provide `--config=cuda_wheel` for bazel build command." + " If you absolutely need to add CUDA dependencies, provide" + " `--@local_config_cuda//cuda:override_include_cuda_libs=true`.") + if include_nvshmem_libs and not override_include_nvshmem_libs: + fail("TF wheel shouldn't be built directly against the NVSHMEM libraries." + + " Please provide `--config=cuda_wheel` for bazel build command." + + " If you absolutely need to build links directly against the NVSHMEM libraries," + + " `provide --@local_config_nvshmem//:override_include_nvshmem_libs=true`.") executable = ctx.executable.wheel_binary full_wheel_version = (TF_VERSION + TF_WHEEL_VERSION_SUFFIX) @@ -147,6 +154,8 @@ tf_wheel = rule( ), "include_cuda_libs": attr.label(default = Label("@local_config_cuda//cuda:include_cuda_libs")), "override_include_cuda_libs": attr.label(default = Label("@local_config_cuda//cuda:override_include_cuda_libs")), + "include_nvshmem_libs": attr.label(default = Label("@local_config_nvshmem//:include_nvshmem_libs")), + "override_include_nvshmem_libs": attr.label(default = Label("@local_config_nvshmem//:override_include_nvshmem_libs")), "platform_tag": attr.string(mandatory = True), "platform_name": attr.string(mandatory = True), }, diff --git a/third_party/xla/WORKSPACE b/third_party/xla/WORKSPACE index fb250a66dac..2ed3b4fb5c2 100644 --- a/third_party/xla/WORKSPACE +++ b/third_party/xla/WORKSPACE @@ -99,3 +99,30 @@ load( ) nccl_configure(name = "local_config_nccl") + +load( + "//third_party/nvshmem/hermetic:nvshmem_json_init_repository.bzl", + "nvshmem_json_init_repository", +) + +nvshmem_json_init_repository() + +load( + "@nvshmem_redist_json//:distributions.bzl", + "NVSHMEM_REDISTRIBUTIONS", +) +load( + "//third_party/nvshmem/hermetic:nvshmem_redist_init_repository.bzl", + "nvshmem_redist_init_repository", +) + +nvshmem_redist_init_repository( + nvshmem_redistributions = NVSHMEM_REDISTRIBUTIONS, +) + +load( + "@local_xla//third_party/nvshmem/hermetic:nvshmem_configure.bzl", + "nvshmem_configure", +) + +nvshmem_configure(name = "local_config_nvshmem") diff --git a/third_party/xla/tensorflow.bazelrc b/third_party/xla/tensorflow.bazelrc index 5e42e49dcab..c1611786c8e 100644 --- a/third_party/xla/tensorflow.bazelrc +++ b/third_party/xla/tensorflow.bazelrc @@ -162,9 +162,10 @@ build:mkl_aarch64 -c opt build:mkl_aarch64_threadpool --define=build_with_mkl_aarch64=true build:mkl_aarch64_threadpool -c opt -# Default CUDA and CUDNN versions. +# Default CUDA, CUDNN and NVSHMEM versions. build:cuda_version --repo_env=HERMETIC_CUDA_VERSION="12.6.3" build:cuda_version --repo_env=HERMETIC_CUDNN_VERSION="9.3.0" +build:cuda_version --repo_env=HERMETIC_NVSHMEM_VERSION="3.2.5" # CUDA: This config refers to building CUDA op kernels with nvcc. build:cuda --repo_env TF_NEED_CUDA=1 @@ -176,6 +177,7 @@ build:cuda --@local_config_cuda//cuda:include_cuda_libs=true # This configuration is used for building the wheels. build:cuda_wheel --@local_config_cuda//cuda:include_cuda_libs=false +build:cuda_wheel --@local_config_nvshmem//:include_nvshmem_libs=false # CUDA: This config refers to building CUDA op kernels with clang. build:cuda_clang --config=cuda diff --git a/third_party/xla/third_party/nvshmem/hermetic/nvshmem_configure.bzl b/third_party/xla/third_party/nvshmem/hermetic/nvshmem_configure.bzl index 9b506e04a74..94f6631edb6 100644 --- a/third_party/xla/third_party/nvshmem/hermetic/nvshmem_configure.bzl +++ b/third_party/xla/third_party/nvshmem/hermetic/nvshmem_configure.bzl @@ -26,30 +26,64 @@ load( ) NVSHMEM_ENABLED_BUILD_CONTENT = """ +load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "bool_setting") + +# This set of flags and config_settings is needed to enable NVSHMEM dependencies +# separately from CUDA dependencies. The reason is that NVSHMEM libraries +# require GLIBC 2.28 and above, which we don't have on RBE runners yet. +# TODO(ybaturina): Remove this once GLIBC 2.28 is available on RBE. bool_flag( name = "include_nvshmem_libs", build_setting_default = False, + visibility = ["//visibility:public"], ) config_setting( name = "nvshmem_libs", flag_values = {":include_nvshmem_libs": "True"}, + visibility = ["//visibility:private"], ) -bool_setting( - name = "true_setting", - visibility = ["//visibility:private"], - build_setting_default = True, +bool_flag( + name = "override_include_nvshmem_libs", + build_setting_default = False, + visibility = ["//visibility:public"], ) config_setting( + name = "overrided_nvshmem_libs", + flag_values = {":true_setting": "False"}, + visibility = ["//visibility:private"], +) + +alias( name = "nvshmem_tools", - flag_values = {":true_setting": "True"}, + actual = "@local_config_cuda//:is_cuda_enabled", + visibility = ["//visibility:public"], +) + +selects.config_setting_group( + name = "any_nvshmem_libs", + match_any = [ + ":nvshmem_libs", + ":overrided_nvshmem_libs", + ], + visibility = ["//visibility:private"], +) + +selects.config_setting_group( + name = "nvshmem_tools_and_libs", + match_all = [ + ":any_nvshmem_libs", + ":nvshmem_tools", + ], + visibility = ["//visibility:public"], ) """ NVSHMEM_DISABLED_BUILD_CONTENT = """ +load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "bool_setting") bool_setting( @@ -58,14 +92,52 @@ bool_setting( build_setting_default = True, ) +bool_flag( + name = "include_nvshmem_libs", + build_setting_default = False, + visibility = ["//visibility:public"], +) + config_setting( name = "nvshmem_tools", flag_values = {":true_setting": "False"}, + visibility = ["//visibility:public"], ) config_setting( name = "nvshmem_libs", flag_values = {":true_setting": "False"}, + visibility = ["//visibility:private"], +) + +bool_flag( + name = "override_include_nvshmem_libs", + build_setting_default = False, + visibility = ["//visibility:public"], +) + +config_setting( + name = "overrided_nvshmem_libs", + flag_values = {":true_setting": "False"}, + visibility = ["//visibility:private"], +) + +selects.config_setting_group( + name = "any_nvshmem_libs", + match_any = [ + ":nvshmem_libs", + ":overrided_nvshmem_libs" + ], + visibility = ["//visibility:private"], +) + +selects.config_setting_group( + name = "nvshmem_tools_and_libs", + match_all = [ + ":any_nvshmem_libs", + ":nvshmem_tools" + ], + visibility = ["//visibility:public"], ) """ diff --git a/third_party/xla/xla/BUILD b/third_party/xla/xla/BUILD index 9084719ee2e..6de20dbf882 100644 --- a/third_party/xla/xla/BUILD +++ b/third_party/xla/xla/BUILD @@ -1454,6 +1454,7 @@ bzl_library( "@bazel_skylib//lib:paths", "//xla/tsl:package_groups_bzl", "//xla/tsl:tsl_bzl", + "//xla/tsl:tsl_default_bzl", "//xla/tsl/platform/default:cuda_build_defs_bzl", ], ) diff --git a/third_party/xla/xla/lit.bzl b/third_party/xla/xla/lit.bzl index 22c519dedd9..3a7340e5ab9 100644 --- a/third_party/xla/xla/lit.bzl +++ b/third_party/xla/xla/lit.bzl @@ -3,7 +3,8 @@ load("@bazel_skylib//lib:paths.bzl", "paths") load("@rules_python//python:defs.bzl", "py_binary") load("//xla/tsl:package_groups.bzl", "DEFAULT_LOAD_VISIBILITY") -load("//xla/tsl:tsl.bzl", "if_cuda_tools", "if_google", "if_oss") +load("//xla/tsl:tsl.bzl", "if_google", "if_oss") +load("//xla/tsl:tsl.default.bzl", "if_cuda_tools") load("//xla/tsl/platform/default:cuda_build_defs.bzl", "if_cuda_is_configured") visibility(DEFAULT_LOAD_VISIBILITY) diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index 3fd17916fcc..c5676515b3b 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -14,14 +14,19 @@ load( load("//xla/tests:build_defs.bzl", "xla_test") load( "//xla/tsl:tsl.bzl", - # copybara:comment_begin - "if_cuda_tools", - # copybara:comment_end "if_google", "if_windows", "internal_visibility", "tsl_copts", ) + +# copybara:comment_begin +load( + "//xla/tsl:tsl.default.bzl", + "if_cuda_tools", +) + +# copybara:comment_end load("//xla/tsl/platform:build_config.bzl", "tf_proto_library") load( "//xla/tsl/platform:build_config_root.bzl", diff --git a/third_party/xla/xla/tsl/BUILD b/third_party/xla/xla/tsl/BUILD index a230f93e9e5..28f6a3cf48d 100644 --- a/third_party/xla/xla/tsl/BUILD +++ b/third_party/xla/xla/tsl/BUILD @@ -553,6 +553,16 @@ bzl_library( visibility = ["//xla:__subpackages__"], ) +bzl_library( + name = "tsl_default_bzl", + srcs = if_oss(["tsl.default.bzl"]), + visibility = ["//visibility:public"], + deps = [ + ":package_groups_bzl", + ":tsl_bzl", + ], +) + # copybara:comment_begin(oss-only) cc_library( name = "grpc++", diff --git a/third_party/xla/xla/tsl/cuda/BUILD.bazel b/third_party/xla/xla/tsl/cuda/BUILD.bazel index fdcc3413e2c..b6f70cdbe5a 100644 --- a/third_party/xla/xla/tsl/cuda/BUILD.bazel +++ b/third_party/xla/xla/tsl/cuda/BUILD.bazel @@ -11,7 +11,7 @@ load( "if_cuda_is_configured", ) load( - "//xla/tsl:tsl.bzl", + "//xla/tsl:tsl.default.bzl", "if_cuda_libs", ) load("//xla/tsl/cuda:stub.bzl", "cuda_stub") diff --git a/third_party/xla/xla/tsl/platform/default/BUILD b/third_party/xla/xla/tsl/platform/default/BUILD index 98a7f14631a..acf419ba854 100644 --- a/third_party/xla/xla/tsl/platform/default/BUILD +++ b/third_party/xla/xla/tsl/platform/default/BUILD @@ -3,14 +3,18 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured") load( "//xla/tsl:tsl.bzl", - "if_cuda_tools", "if_not_fuchsia", "if_not_windows", "if_oss", "internal_visibility", "tsl_copts", ) -load("//xla/tsl:tsl.default.bzl", "filegroup", "tsl_grpc_cc_dependencies") +load( + "//xla/tsl:tsl.default.bzl", + "filegroup", + "if_cuda_tools", + "tsl_grpc_cc_dependencies", +) load("//xla/tsl/platform:rules_cc.bzl", "cc_library") package( diff --git a/third_party/xla/xla/tsl/tsl.bzl b/third_party/xla/xla/tsl/tsl.bzl index 3fec637a595..b0f80338fbd 100644 --- a/third_party/xla/xla/tsl/tsl.bzl +++ b/third_party/xla/xla/tsl/tsl.bzl @@ -233,16 +233,6 @@ def if_with_tpu_support(if_true, if_false = []): "//conditions:default": if_false, }) -# These configs are used to determine whether we should use CUDA tools and libs in cc_libraries. -# They are intended for the OSS builds only. -def if_cuda_tools(if_true, if_false = []): # buildifier: disable=unused-variable - """Shorthand for select()'ing on whether we're building with hCUDA tools.""" - return select({"@local_config_cuda//cuda:cuda_tools": if_true, "//conditions:default": if_false}) # copybara:comment_replace return if_false - -def if_cuda_libs(if_true, if_false = []): # buildifier: disable=unused-variable - """Shorthand for select()'ing on whether we need to include hermetic CUDA libraries.""" - return select({"@local_config_cuda//cuda:cuda_tools_and_libs": if_true, "//conditions:default": if_false}) # copybara:comment_replace return if_false - def get_win_copts(is_external = False): WINDOWS_COPTS = [ # copybara:uncomment_begin(no MSVC flags in google) diff --git a/third_party/xla/xla/tsl/tsl.default.bzl b/third_party/xla/xla/tsl/tsl.default.bzl index b746c2ca9c4..baa1d5256de 100644 --- a/third_party/xla/xla/tsl/tsl.default.bzl +++ b/third_party/xla/xla/tsl/tsl.default.bzl @@ -27,3 +27,34 @@ tsl_pybind_extension = _tsl_pybind_extension tsl_google_bzl_deps = _tsl_google_bzl_deps tsl_extra_config_settings = _tsl_extra_config_settings tsl_extra_config_settings_targets = _tsl_extra_config_settings_targets + +# These configs are used to determine whether we should use CUDA/NVSHMEM tools and libs in +# cc_libraries. +# They are intended for the OSS builds only. +def if_cuda_tools(if_true, if_false = []): # buildifier: disable=unused-variable + """Shorthand for select()'ing on whether we're building with hermetic CUDA tools.""" + return select({ + "@local_config_cuda//cuda:cuda_tools": if_true, + "//conditions:default": if_false, + }) + +def if_cuda_libs(if_true, if_false = []): # buildifier: disable=unused-variable + """Shorthand for select()'ing on whether we need to include hermetic CUDA libraries.""" + return select({ + "@local_config_cuda//cuda:cuda_tools_and_libs": if_true, + "//conditions:default": if_false, + }) + +def if_nvshmem_tools(if_true, if_false = []): # buildifier: disable=unused-variable + """Shorthand for select()'ing on whether we're building with hermetic NVSHMEM tools.""" + return select({ + "@local_config_nvshmem//:nvshmem_tools": if_true, + "//conditions:default": if_false, + }) + +def if_nvshmem_libs(if_true, if_false = []): # buildifier: disable=unused-variable + """Shorthand for select()'ing on whether we need to include hermetic NVSHMEM libraries.""" + return select({ + "@local_config_nvshmem//:nvshmem_tools_and_libs": if_true, + "//conditions:default": if_false, + })