tensorflow/third_party/nccl/nccl_configure.bzl
A. Unique TensorFlower 9b5fa66dc6 Introduce hermetic CUDA in Google ML projects.
1) Hermetic CUDA rules allow building wheels with GPU support on a machine without GPUs, as well as running Bazel GPU tests on a machine with only GPUs and NVIDIA driver installed. When `--config=cuda` is provided in Bazel options, Bazel will download CUDA, CUDNN and NCCL redistributions in the cache, and use them during build and test phases.

    [Default location of CUNN redistributions](https://developer.download.nvidia.com/compute/cudnn/redist/)

    [Default location of CUDA redistributions](https://developer.download.nvidia.com/compute/cuda/redist/)

    [Default location of NCCL redistributions](https://pypi.org/project/nvidia-nccl-cu12/#history)

2) To include hermetic CUDA rules in your project, add the following in the WORKSPACE of the downstream project dependent on XLA.

   Note: use `@local_tsl` instead of `@tsl` in Tensorflow project.

   ```
   load(
      "@tsl//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
      "cuda_json_init_repository",
   )

   cuda_json_init_repository()

   load(
      "@cuda_redist_json//:distributions.bzl",
      "CUDA_REDISTRIBUTIONS",
      "CUDNN_REDISTRIBUTIONS",
   )
   load(
      "@tsl//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
      "cuda_redist_init_repositories",
      "cudnn_redist_init_repository",
   )

   cuda_redist_init_repositories(
      cuda_redistributions = CUDA_REDISTRIBUTIONS,
   )

   cudnn_redist_init_repository(
      cudnn_redistributions = CUDNN_REDISTRIBUTIONS,
   )

   load(
      "@tsl//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
      "cuda_configure",
   )

   cuda_configure(name = "local_config_cuda")

   load(
      "@tsl//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
      "nccl_redist_init_repository",
   )

   nccl_redist_init_repository()

   load(
      "@tsl//third_party/nccl/hermetic:nccl_configure.bzl",
      "nccl_configure",
   )

   nccl_configure(name = "local_config_nccl")
   ```

PiperOrigin-RevId: 662981325
2024-08-14 11:47:44 -07:00

222 lines
6.5 KiB
Python

"""Repository rule for NCCL configuration.
NB: DEPRECATED! Use `hermetic/nccl_configure` rule instead.
`nccl_configure` depends on the following environment variables:
* `TF_NCCL_VERSION`: Installed NCCL version or empty to build from source.
* `NCCL_INSTALL_PATH` (deprecated): The installation path of the NCCL library.
* `NCCL_HDR_PATH` (deprecated): The installation path of the NCCL header
files.
* `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
`/usr/local/cuda,usr/`.
* `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should
be used, "0" if NCCL should be linked in statically.
"""
load(
"//third_party/gpus:cuda_configure.bzl",
"enable_cuda",
"find_cuda_config",
)
load(
"//third_party/remote_config:common.bzl",
"config_repo_label",
"get_cpu_value",
"get_host_environ",
)
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
_NCCL_HDR_PATH = "NCCL_HDR_PATH"
_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
_TF_NCCL_VERSION = "TF_NCCL_VERSION"
_TF_NEED_CUDA = "TF_NEED_CUDA"
_TF_CUDA_PATHS = "TF_CUDA_PATHS"
_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB"
_DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR"
_DEFINE_NCCL_MINOR = "#define NCCL_MINOR"
_DEFINE_NCCL_PATCH = "#define NCCL_PATCH"
_NCCL_DUMMY_BUILD_CONTENT = """
filegroup(
name = "LICENSE",
visibility = ["//visibility:public"],
)
cc_library(
name = "nccl",
visibility = ["//visibility:public"],
)
cc_library(
name = "nccl_config",
hdrs = ["nccl_config.h"],
include_prefix = "third_party/nccl",
visibility = ["//visibility:public"],
)
"""
_NCCL_ARCHIVE_BUILD_CONTENT = """
filegroup(
name = "LICENSE",
data = ["@nccl_archive//:LICENSE.txt"],
visibility = ["//visibility:public"],
)
alias(
name = "nccl",
actual = "@nccl_archive//:nccl",
visibility = ["//visibility:public"],
)
alias(
name = "nccl_config",
actual = "@nccl_archive//:nccl_config",
visibility = ["//visibility:public"],
)
"""
_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """
filegroup(
name = "LICENSE",
data = ["@nccl_archive//:LICENSE.txt"],
visibility = ["//visibility:public"],
)
alias(
name = "nccl",
actual = "@nccl_archive//:nccl_via_stub",
visibility = ["//visibility:public"],
)
alias(
name = "nccl_headers",
actual = "@nccl_archive//:nccl_headers",
visibility = ["//visibility:public"],
)
alias(
name = "nccl_config",
actual = "@nccl_archive//:nccl_config",
visibility = ["//visibility:public"],
)
"""
def _label(file):
return Label("//third_party/nccl:{}".format(file))
def _create_local_nccl_repository(repository_ctx):
nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "")
if nccl_version:
nccl_version = nccl_version.split(".")[0]
cuda_config = find_cuda_config(repository_ctx, ["cuda"])
cuda_version = cuda_config["cuda_version"].split(".")
if nccl_version == "":
# Alias to open source build from @nccl_archive.
if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0":
repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
else:
repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT)
repository_ctx.template("generated_names.bzl", _label("generated_names.bzl.tpl"), {})
repository_ctx.template(
"build_defs.bzl",
_label("build_defs.bzl.tpl"),
{
"%{cuda_version}": "(%s, %s)" % tuple(cuda_version),
"%{nvlink_label}": "@local_config_cuda//cuda:cuda/bin/nvlink",
"%{fatbinary_label}": "@local_config_cuda//cuda:cuda/bin/fatbinary",
"%{bin2c_label}": "@local_config_cuda//cuda:cuda/bin/bin2c",
"%{link_stub_label}": "@local_config_cuda//cuda:cuda/bin/crt/link.stub",
"%{nvprune_label}": "@local_config_cuda//cuda:cuda/bin/nvprune",
},
)
else:
# Create target for locally installed NCCL.
config = find_cuda_config(repository_ctx, ["nccl"])
config_wrap = {
"%{nccl_version}": config["nccl_version"],
"%{nccl_header_dir}": config["nccl_include_dir"],
"%{nccl_library_dir}": config["nccl_library_dir"],
}
repository_ctx.template("BUILD", _label("system.BUILD.tpl"), config_wrap)
repository_ctx.template("generated_names.bzl", _label("generated_names.bzl.tpl"), {})
def _create_remote_nccl_repository(repository_ctx, remote_config_repo):
repository_ctx.template(
"BUILD",
config_repo_label(remote_config_repo, ":BUILD"),
{},
)
nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "")
if nccl_version == "":
repository_ctx.template(
"generated_names.bzl",
config_repo_label(remote_config_repo, ":generated_names.bzl"),
{},
)
repository_ctx.template(
"build_defs.bzl",
config_repo_label(remote_config_repo, ":build_defs.bzl"),
{},
)
def _nccl_autoconf_impl(repository_ctx):
if (not enable_cuda(repository_ctx) or
get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD")):
# Add a dummy build file to make bazel query happy.
repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
repository_ctx.file("nccl_config.h", "#define TF_NCCL_VERSION \"\"")
elif get_host_environ(repository_ctx, "TF_NCCL_CONFIG_REPO") != None:
_create_remote_nccl_repository(repository_ctx, get_host_environ(repository_ctx, "TF_NCCL_CONFIG_REPO"))
else:
_create_local_nccl_repository(repository_ctx)
_ENVIRONS = [
_CUDA_TOOLKIT_PATH,
_NCCL_HDR_PATH,
_NCCL_INSTALL_PATH,
_TF_NCCL_VERSION,
_TF_CUDA_COMPUTE_CAPABILITIES,
_TF_NEED_CUDA,
_TF_CUDA_PATHS,
]
remote_nccl_configure = repository_rule(
implementation = _create_local_nccl_repository,
environ = _ENVIRONS,
remotable = True,
attrs = {
"environ": attr.string_dict(),
"_find_cuda_config": attr.label(
default = Label("@local_tsl//third_party/gpus:find_cuda_config.py"),
),
},
)
nccl_configure = repository_rule(
implementation = _nccl_autoconf_impl,
environ = _ENVIRONS,
attrs = {
"_find_cuda_config": attr.label(
default = Label("@local_tsl//third_party/gpus:find_cuda_config.py"),
),
},
)
"""Detects and configures the NCCL configuration.
Add the following to your WORKSPACE FILE:
```python
nccl_configure(name = "local_config_nccl")
```
Args:
name: A unique name for this workspace rule.
"""