Remove some macros

2024-11-21 21:05:19 +00:00 · 2021-03-12 15:08:43 -08:00 · 2021-03-12 15:08:43 -08:00 · 4e780fed91
commit 4e780fed91
parent 558472bcda
7 changed files with 26 additions and 68 deletions
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -1655,7 +1655,9 @@ tf_cuda_library(
        "//tensorflow/core/util:stats_calculator_portable",
        "//tensorflow/core/util:tensor_format",
        "//tensorflow/compiler/jit:common",
-    ] + if_static(
+    ] + if_cuda([
+        "@local_config_cuda//cuda:cudnn_header",
+    ]) + if_static(
        extra_deps = ["@com_google_protobuf//:protobuf"],
        otherwise = ["@com_google_protobuf//:protobuf_headers"],
    ),
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@ -988,19 +988,17 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
 #endif
  AlgorithmConfig algorithm_config;
  bool do_autotune;
-#if GOOGLE_CUDA
  if (CudnnUseFrontend()) {
    do_autotune = cudnn_use_autotune &&
        !AutoTuneConvBwdFilterExecutionPlan::GetInstance()->Find(
            conv_parameters, &algorithm_config);
  } else {
-#endif
    do_autotune = cudnn_use_autotune &&
        !AutoTuneConvBwdFilter::GetInstance()->Find(
            conv_parameters, &algorithm_config);
-#if GOOGLE_CUDA
  }

+#if GOOGLE_CUDA
  // The "cached_plans" is used to store the selected execution plans from
  // autotuning to make them live long enough to the end of this op.
  std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -1171,7 +1169,6 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
                           filter_backprop_ptr, out_backprop_ptr, input_desc,
                           filter_desc, output_desc, conv_desc,
                           stream->parent(), results);
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      int idx, idx_no_scratch;
      OP_REQUIRES_OK(ctx,
@ -1195,18 +1192,14 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
      AutoTuneConvBwdFilterExecutionPlan::GetInstance()->Insert(conv_parameters,
                                                                cached_plans);
    } else {
-#endif
      OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
      AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters,
                                                   algorithm_config);
-#if GOOGLE_CUDA
    }
-#endif
  }

  Status cudnn_launch_status;
  DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, ctx);
-#if GOOGLE_CUDA
  if (CudnnUseFrontend()) {
    if (algorithm_config.algorithm().has_value()) {
      VLOG(4) << "Conv2DBackpropFilter Execution Plan: "
@ -1219,14 +1212,11 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
        filter_desc, &filter_backprop_ptr, &scratch_allocator, algorithm_config,
        nullptr);
  } else {
-#endif
    cudnn_launch_status = stream->ConvolveBackwardFilterWithAlgorithm(
        input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc,
        filter_desc, &filter_backprop_ptr, &scratch_allocator, algorithm_config,
        nullptr);
-#if GOOGLE_CUDA
  }
-#endif

  if (!cudnn_launch_status.ok()) {
    ctx->SetStatus(cudnn_launch_status);
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@ -405,19 +405,17 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
 #endif
  AlgorithmConfig algorithm_config;
  bool do_autotune;
-#if GOOGLE_CUDA
  if (CudnnUseFrontend()) {
    do_autotune = cudnn_use_autotune &&
        !AutoTuneConvBwdDataExecutionPlan::GetInstance()->Find(
            conv_parameters, &algorithm_config);
  } else {
-#endif
    do_autotune = cudnn_use_autotune &&
        !AutoTuneConvBwdData::GetInstance()->Find(conv_parameters,
                                                  &algorithm_config);
-#if GOOGLE_CUDA
  }

+#if GOOGLE_CUDA
  // The "cached_plans" is used to store the selected execution plans from
  // autotuning to make them live long enough to the end of this op.
  std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -584,7 +582,6 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
        se::dnn::ConvolutionKind::BACKWARD_DATA, se::dnn::ToDataType<T>::value,
        in_backprop_ptr, filter_ptr, out_backprop_ptr, input_desc, filter_desc,
        output_desc, conv_desc, stream->parent(), results);
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      int idx, idx_no_scratch;
      OP_REQUIRES_OK(ctx,
@ -607,17 +604,13 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
      AutoTuneConvBwdDataExecutionPlan::GetInstance()->Insert(conv_parameters,
                                                              cached_plans);
    } else {
-#endif
      OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
      AutoTuneConvBwdData::GetInstance()->Insert(conv_parameters,
                                                 algorithm_config);
-#if GOOGLE_CUDA
    }
-#endif
  }

  Status cudnn_launch_status;
-#if GOOGLE_CUDA
  if (CudnnUseFrontend()) {
    if (algorithm_config.algorithm().has_value()) {
      VLOG(4) << "Conv2DBackpropInput Execution Plan: "
@ -630,14 +623,11 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
        input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
        nullptr);
  } else {
-#endif
    cudnn_launch_status = stream->ConvolveBackwardDataWithAlgorithm(
        filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc,
        input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
        nullptr);
-#if GOOGLE_CUDA
  }
-#endif

  if (!cudnn_launch_status.ok()) {
    ctx->SetStatus(cudnn_launch_status);
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@ -1428,19 +1428,17 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
 #endif
    AlgorithmConfig algorithm_config;
    bool do_autotune;
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      do_autotune = cudnn_use_autotune_ &&
          !AutoTuneConv3dBwdDataExecutionPlan::GetInstance()->Find(
              conv_parameters, &algorithm_config);
    } else {
-#endif
      do_autotune = cudnn_use_autotune_ &&
          !AutoTuneConv3dBwdData::GetInstance()->Find(
              conv_parameters, &algorithm_config);
-#if GOOGLE_CUDA
    }

+#if GOOGLE_CUDA
    // The "cached_plans" is used to store the selected execution plans from
    // autotuning to make them live long enough to the end of this op.
    std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -1590,7 +1588,6 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
                             filter_ptr, out_backprop_ptr, input_desc,
                             filter_desc, output_desc, conv_desc,
                             stream->parent(), results);
-#if GOOGLE_CUDA
      if (CudnnUseFrontend()) {
        int idx, idx_no_scratch;
        OP_REQUIRES_OK(context,
@ -1614,20 +1611,16 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
        AutoTuneConv3dBwdDataExecutionPlan::GetInstance()->Insert(
            conv_parameters, cached_plans);
      } else {
-#endif
        OP_REQUIRES_OK(context,
                       BestCudnnConvAlgorithm(results, &algorithm_config));
        AutoTuneConv3dBwdData::GetInstance()->Insert(conv_parameters,
                                                     algorithm_config);
-#if GOOGLE_CUDA
      }
-#endif
    }

    Status cudnn_launch_status;
    DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
                                          context);
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      if (algorithm_config.algorithm().has_value()) {
        VLOG(4) << "Conv3DBackpropInput Execution Plan: "
@ -1640,14 +1633,11 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
          input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
          nullptr);
    } else {
-#endif
      cudnn_launch_status = stream->ConvolveBackwardDataWithAlgorithm(
          filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc,
          input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
          nullptr);
-#if GOOGLE_CUDA
    }
-#endif

    if (!cudnn_launch_status.ok()) {
      context->SetStatus(cudnn_launch_status);
@ -2050,19 +2040,17 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {

    bool do_autotune;
    AlgorithmConfig algorithm_config;
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      do_autotune = cudnn_use_autotune_ &&
          !AutoTuneConv3dBwdFilterExecutionPlan::GetInstance()->Find(
              conv_parameters, &algorithm_config);
    } else {
-#endif
      do_autotune = cudnn_use_autotune_ &&
          !AutoTuneConv3dBwdFilter::GetInstance()->Find(
              conv_parameters, &algorithm_config);
-#if GOOGLE_CUDA
    }

+#if GOOGLE_CUDA
    // The "cached_plans" is used to store the selected execution plans from
    // autotuning to make them live long enough to the end of this op.
    std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -2192,7 +2180,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
                             filter_backprop_ptr, out_backprop_ptr, input_desc,
                             filter_desc, output_desc, conv_desc,
                             stream->parent(), results);
-#if GOOGLE_CUDA
      if (CudnnUseFrontend()) {
        int idx, idx_no_scratch;
        OP_REQUIRES_OK(context,
@ -2216,7 +2203,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
        AutoTuneConv3dBwdFilterExecutionPlan::GetInstance()->Insert(
            conv_parameters, cached_plans);
      } else {
-#endif
        Status s = BestCudnnConvAlgorithm(results, &algorithm_config);
 #if GOOGLE_CUDA
        if (s.code() == error::NOT_FOUND) {
@ -2237,15 +2223,12 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
        OP_REQUIRES_OK(context, s);
        AutoTuneConv3dBwdFilter::GetInstance()->Insert(conv_parameters,
                                                       algorithm_config);
-#if GOOGLE_CUDA
      }
-#endif
    }

    Status cudnn_launch_status;
    DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
                                          context);
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      if (algorithm_config.algorithm().has_value()) {
        VLOG(4) << "Conv3DBackpropFilter Execution Plan: "
@ -2258,14 +2241,11 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
          filter_desc, &filter_backprop_ptr, &scratch_allocator,
          algorithm_config, nullptr);
    } else {
-#endif
      cudnn_launch_status = stream->ConvolveBackwardFilterWithAlgorithm(
          input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc,
          filter_desc, &filter_backprop_ptr, &scratch_allocator,
          algorithm_config, nullptr);
-#if GOOGLE_CUDA
    }
-#endif

    if (!cudnn_launch_status.ok()) {
      context->SetStatus(cudnn_launch_status);
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@ -996,18 +996,16 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
 #endif

  bool do_autotune;
-#if GOOGLE_CUDA
  if (CudnnUseFrontend()) {
    do_autotune = cudnn_use_autotune &&
        !AutoTuneConvExecutionPlan::GetInstance()->Find(conv_parameters,
                                                        &algorithm_config);
  } else {
-#endif
    do_autotune = cudnn_use_autotune &&
        !AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config);
-#if GOOGLE_CUDA
  }

+#if GOOGLE_CUDA
  // The "cached_plans" is used to store the selected execution plans from
  // autotuning to make them live long enough to the end of this op.
  std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -1176,7 +1174,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
                           output_tensor, input_desc, filter_desc, output_desc,
                           conv_desc, stream->parent(), results);

-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      int idx, idx_no_scratch;
      OP_REQUIRES_OK(ctx,
@ -1200,17 +1197,13 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
      AutoTuneConvExecutionPlan::GetInstance()->Insert(conv_parameters,
                                                       cached_plans);
    } else {
-#endif
      OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
      AutoTuneConv::GetInstance()->Insert(conv_parameters, algorithm_config);
-#if GOOGLE_CUDA
    }
-#endif
  }

  Status cudnn_launch_status;
  DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
-#if GOOGLE_CUDA
  if (CudnnUseFrontend()) {
    if (algorithm_config.algorithm().has_value()) {
      VLOG(4) << "Conv2D Execution Plan: "
@ -1222,7 +1215,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
        input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc,
        &output_ptr, &scratch_allocator, algorithm_config, nullptr);
  } else {
-#endif
    VLOG(4) << "Convolution Algorithm: "
            << algorithm_config.algorithm()->algo_id();
    VLOG(4) << "tensor_ops_enabled: "
@ -1231,9 +1223,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
    cudnn_launch_status = stream->ConvolveWithAlgorithm(
        input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc,
        &output_ptr, &scratch_allocator, algorithm_config, nullptr);
-#if GOOGLE_CUDA
  }
-#endif

  if (!cudnn_launch_status.ok()) {
    ctx->SetStatus(cudnn_launch_status);
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@ -510,19 +510,17 @@ struct LaunchConvOp<GPUDevice, T> {
 		AlgorithmConfig algorithm_config;

    bool do_autotune;
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      do_autotune = cudnn_use_autotune &&
          !AutoTuneConv3dExecutionPlan::GetInstance()->Find(
              conv_parameters, &algorithm_config);
    } else {
-#endif
      do_autotune = cudnn_use_autotune &&
           !AutoTuneConv3d::GetInstance()->Find(
               conv_parameters, &algorithm_config);
-#if GOOGLE_CUDA
    }

+#if GOOGLE_CUDA
    // The "cached_plans" is used to store the selected execution plans from
    // autotuning to make them live long enough to the end of this op.
    std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -689,7 +687,6 @@ struct LaunchConvOp<GPUDevice, T> {
                             se::dnn::ToDataType<T>::value, input_ptr,
                             filter_ptr, output_ptr, input_desc, filter_desc,
                             output_desc, conv_desc, stream->parent(), results);
-#if GOOGLE_CUDA
      if (CudnnUseFrontend()) {
        int idx, idx_no_scratch;
        OP_REQUIRES_OK(ctx,
@ -713,18 +710,14 @@ struct LaunchConvOp<GPUDevice, T> {
        AutoTuneConv3dExecutionPlan::GetInstance()->Insert(conv_parameters,
                                                           cached_plans);
      } else {
-#endif
        OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
        AutoTuneConv3d::GetInstance()->Insert(conv_parameters,
                                              algorithm_config);
-#if GOOGLE_CUDA
      }
-#endif
    }

    Status cudnn_launch_status;
 	  DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
-#if GOOGLE_CUDA
    if (CudnnUseFrontend()) {
      if (algorithm_config.algorithm().has_value()) {
        VLOG(4) << "Conv3D Execution Plan: "
@ -737,14 +730,11 @@ struct LaunchConvOp<GPUDevice, T> {
          output_desc, &output_ptr, &scratch_allocator, algorithm_config,
          nullptr);
    } else {
-#endif
      cudnn_launch_status = stream->ConvolveWithAlgorithm(
          input_desc, input_ptr, filter_desc, filter_ptr, conv_desc,
          output_desc, &output_ptr, &scratch_allocator, algorithm_config,
          nullptr);
-#if GOOGLE_CUDA
    }
-#endif

    if (!cudnn_launch_status.ok()) {
      ctx->SetStatus(cudnn_launch_status);
--- a/tensorflow/core/util/use_cudnn.cc
+++ b/tensorflow/core/util/use_cudnn.cc
@ -20,6 +20,10 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/env_var.h"

+#if GOOGLE_CUDA
+#include "third_party/gpus/cudnn/cudnn.h"
+#endif  // GOOGLE_CUDA
+
 namespace tensorflow {

 #define ADD_BOOL_CUDNN_FLAG(func_name, flag_name, default_value)           \
@ -32,8 +36,20 @@ namespace tensorflow {
    return value;                                                          \
  }

+bool CudnnUseFrontend() {
+#if GOOGLE_CUDA && CUDNN_VERSION >= 8100
+  bool value = false;
+  Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_FRONTEND", false, &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+  }
+  return value;
+#else
+  return false;
+#endif // GOOGLE_CUDA && CUDNN_VERSION >= 8100
+}
+
 ADD_BOOL_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true);
-ADD_BOOL_CUDNN_FLAG(CudnnUseFrontend, TF_CUDNN_USE_FRONTEND, false);
 // Whether to auto-tuning Cudnn RNN forward and backward pass to pick
 // statistically the best cudnnRNNAlgo_t and cudnnMathType_t.
 // The flag is disabled when TF_DEBUG_CUDNN_RNN is turned on.