Remove some macros

This commit is contained in:
Kaixi Hou 2021-03-12 15:08:43 -08:00
parent 558472bcda
commit 4e780fed91
7 changed files with 26 additions and 68 deletions

View File

@ -1655,7 +1655,9 @@ tf_cuda_library(
"//tensorflow/core/util:stats_calculator_portable",
"//tensorflow/core/util:tensor_format",
"//tensorflow/compiler/jit:common",
] + if_static(
] + if_cuda([
"@local_config_cuda//cuda:cudnn_header",
]) + if_static(
extra_deps = ["@com_google_protobuf//:protobuf"],
otherwise = ["@com_google_protobuf//:protobuf_headers"],
),

View File

@ -988,19 +988,17 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
#endif
AlgorithmConfig algorithm_config;
bool do_autotune;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
do_autotune = cudnn_use_autotune &&
!AutoTuneConvBwdFilterExecutionPlan::GetInstance()->Find(
conv_parameters, &algorithm_config);
} else {
#endif
do_autotune = cudnn_use_autotune &&
!AutoTuneConvBwdFilter::GetInstance()->Find(
conv_parameters, &algorithm_config);
#if GOOGLE_CUDA
}
#if GOOGLE_CUDA
// The "cached_plans" is used to store the selected execution plans from
// autotuning to make them live long enough to the end of this op.
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -1171,7 +1169,6 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
filter_backprop_ptr, out_backprop_ptr, input_desc,
filter_desc, output_desc, conv_desc,
stream->parent(), results);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
int idx, idx_no_scratch;
OP_REQUIRES_OK(ctx,
@ -1195,18 +1192,14 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
AutoTuneConvBwdFilterExecutionPlan::GetInstance()->Insert(conv_parameters,
cached_plans);
} else {
#endif
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters,
algorithm_config);
#if GOOGLE_CUDA
}
#endif
}
Status cudnn_launch_status;
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, ctx);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
if (algorithm_config.algorithm().has_value()) {
VLOG(4) << "Conv2DBackpropFilter Execution Plan: "
@ -1219,14 +1212,11 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
filter_desc, &filter_backprop_ptr, &scratch_allocator, algorithm_config,
nullptr);
} else {
#endif
cudnn_launch_status = stream->ConvolveBackwardFilterWithAlgorithm(
input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc,
filter_desc, &filter_backprop_ptr, &scratch_allocator, algorithm_config,
nullptr);
#if GOOGLE_CUDA
}
#endif
if (!cudnn_launch_status.ok()) {
ctx->SetStatus(cudnn_launch_status);

View File

@ -405,19 +405,17 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
#endif
AlgorithmConfig algorithm_config;
bool do_autotune;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
do_autotune = cudnn_use_autotune &&
!AutoTuneConvBwdDataExecutionPlan::GetInstance()->Find(
conv_parameters, &algorithm_config);
} else {
#endif
do_autotune = cudnn_use_autotune &&
!AutoTuneConvBwdData::GetInstance()->Find(conv_parameters,
&algorithm_config);
#if GOOGLE_CUDA
}
#if GOOGLE_CUDA
// The "cached_plans" is used to store the selected execution plans from
// autotuning to make them live long enough to the end of this op.
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -584,7 +582,6 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
se::dnn::ConvolutionKind::BACKWARD_DATA, se::dnn::ToDataType<T>::value,
in_backprop_ptr, filter_ptr, out_backprop_ptr, input_desc, filter_desc,
output_desc, conv_desc, stream->parent(), results);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
int idx, idx_no_scratch;
OP_REQUIRES_OK(ctx,
@ -607,17 +604,13 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
AutoTuneConvBwdDataExecutionPlan::GetInstance()->Insert(conv_parameters,
cached_plans);
} else {
#endif
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
AutoTuneConvBwdData::GetInstance()->Insert(conv_parameters,
algorithm_config);
#if GOOGLE_CUDA
}
#endif
}
Status cudnn_launch_status;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
if (algorithm_config.algorithm().has_value()) {
VLOG(4) << "Conv2DBackpropInput Execution Plan: "
@ -630,14 +623,11 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
nullptr);
} else {
#endif
cudnn_launch_status = stream->ConvolveBackwardDataWithAlgorithm(
filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc,
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
nullptr);
#if GOOGLE_CUDA
}
#endif
if (!cudnn_launch_status.ok()) {
ctx->SetStatus(cudnn_launch_status);

View File

@ -1428,19 +1428,17 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
#endif
AlgorithmConfig algorithm_config;
bool do_autotune;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
do_autotune = cudnn_use_autotune_ &&
!AutoTuneConv3dBwdDataExecutionPlan::GetInstance()->Find(
conv_parameters, &algorithm_config);
} else {
#endif
do_autotune = cudnn_use_autotune_ &&
!AutoTuneConv3dBwdData::GetInstance()->Find(
conv_parameters, &algorithm_config);
#if GOOGLE_CUDA
}
#if GOOGLE_CUDA
// The "cached_plans" is used to store the selected execution plans from
// autotuning to make them live long enough to the end of this op.
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -1590,7 +1588,6 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
filter_ptr, out_backprop_ptr, input_desc,
filter_desc, output_desc, conv_desc,
stream->parent(), results);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
int idx, idx_no_scratch;
OP_REQUIRES_OK(context,
@ -1614,20 +1611,16 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
AutoTuneConv3dBwdDataExecutionPlan::GetInstance()->Insert(
conv_parameters, cached_plans);
} else {
#endif
OP_REQUIRES_OK(context,
BestCudnnConvAlgorithm(results, &algorithm_config));
AutoTuneConv3dBwdData::GetInstance()->Insert(conv_parameters,
algorithm_config);
#if GOOGLE_CUDA
}
#endif
}
Status cudnn_launch_status;
DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
context);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
if (algorithm_config.algorithm().has_value()) {
VLOG(4) << "Conv3DBackpropInput Execution Plan: "
@ -1640,14 +1633,11 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
nullptr);
} else {
#endif
cudnn_launch_status = stream->ConvolveBackwardDataWithAlgorithm(
filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc,
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
nullptr);
#if GOOGLE_CUDA
}
#endif
if (!cudnn_launch_status.ok()) {
context->SetStatus(cudnn_launch_status);
@ -2050,19 +2040,17 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
bool do_autotune;
AlgorithmConfig algorithm_config;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
do_autotune = cudnn_use_autotune_ &&
!AutoTuneConv3dBwdFilterExecutionPlan::GetInstance()->Find(
conv_parameters, &algorithm_config);
} else {
#endif
do_autotune = cudnn_use_autotune_ &&
!AutoTuneConv3dBwdFilter::GetInstance()->Find(
conv_parameters, &algorithm_config);
#if GOOGLE_CUDA
}
#if GOOGLE_CUDA
// The "cached_plans" is used to store the selected execution plans from
// autotuning to make them live long enough to the end of this op.
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -2192,7 +2180,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
filter_backprop_ptr, out_backprop_ptr, input_desc,
filter_desc, output_desc, conv_desc,
stream->parent(), results);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
int idx, idx_no_scratch;
OP_REQUIRES_OK(context,
@ -2216,7 +2203,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
AutoTuneConv3dBwdFilterExecutionPlan::GetInstance()->Insert(
conv_parameters, cached_plans);
} else {
#endif
Status s = BestCudnnConvAlgorithm(results, &algorithm_config);
#if GOOGLE_CUDA
if (s.code() == error::NOT_FOUND) {
@ -2237,15 +2223,12 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
OP_REQUIRES_OK(context, s);
AutoTuneConv3dBwdFilter::GetInstance()->Insert(conv_parameters,
algorithm_config);
#if GOOGLE_CUDA
}
#endif
}
Status cudnn_launch_status;
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
context);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
if (algorithm_config.algorithm().has_value()) {
VLOG(4) << "Conv3DBackpropFilter Execution Plan: "
@ -2258,14 +2241,11 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
filter_desc, &filter_backprop_ptr, &scratch_allocator,
algorithm_config, nullptr);
} else {
#endif
cudnn_launch_status = stream->ConvolveBackwardFilterWithAlgorithm(
input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc,
filter_desc, &filter_backprop_ptr, &scratch_allocator,
algorithm_config, nullptr);
#if GOOGLE_CUDA
}
#endif
if (!cudnn_launch_status.ok()) {
context->SetStatus(cudnn_launch_status);

View File

@ -996,18 +996,16 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
#endif
bool do_autotune;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
do_autotune = cudnn_use_autotune &&
!AutoTuneConvExecutionPlan::GetInstance()->Find(conv_parameters,
&algorithm_config);
} else {
#endif
do_autotune = cudnn_use_autotune &&
!AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config);
#if GOOGLE_CUDA
}
#if GOOGLE_CUDA
// The "cached_plans" is used to store the selected execution plans from
// autotuning to make them live long enough to the end of this op.
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -1176,7 +1174,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
output_tensor, input_desc, filter_desc, output_desc,
conv_desc, stream->parent(), results);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
int idx, idx_no_scratch;
OP_REQUIRES_OK(ctx,
@ -1200,17 +1197,13 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
AutoTuneConvExecutionPlan::GetInstance()->Insert(conv_parameters,
cached_plans);
} else {
#endif
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
AutoTuneConv::GetInstance()->Insert(conv_parameters, algorithm_config);
#if GOOGLE_CUDA
}
#endif
}
Status cudnn_launch_status;
DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
if (algorithm_config.algorithm().has_value()) {
VLOG(4) << "Conv2D Execution Plan: "
@ -1222,7 +1215,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc,
&output_ptr, &scratch_allocator, algorithm_config, nullptr);
} else {
#endif
VLOG(4) << "Convolution Algorithm: "
<< algorithm_config.algorithm()->algo_id();
VLOG(4) << "tensor_ops_enabled: "
@ -1231,9 +1223,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
cudnn_launch_status = stream->ConvolveWithAlgorithm(
input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc,
&output_ptr, &scratch_allocator, algorithm_config, nullptr);
#if GOOGLE_CUDA
}
#endif
if (!cudnn_launch_status.ok()) {
ctx->SetStatus(cudnn_launch_status);

View File

@ -510,19 +510,17 @@ struct LaunchConvOp<GPUDevice, T> {
AlgorithmConfig algorithm_config;
bool do_autotune;
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
do_autotune = cudnn_use_autotune &&
!AutoTuneConv3dExecutionPlan::GetInstance()->Find(
conv_parameters, &algorithm_config);
} else {
#endif
do_autotune = cudnn_use_autotune &&
!AutoTuneConv3d::GetInstance()->Find(
conv_parameters, &algorithm_config);
#if GOOGLE_CUDA
}
#if GOOGLE_CUDA
// The "cached_plans" is used to store the selected execution plans from
// autotuning to make them live long enough to the end of this op.
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
@ -689,7 +687,6 @@ struct LaunchConvOp<GPUDevice, T> {
se::dnn::ToDataType<T>::value, input_ptr,
filter_ptr, output_ptr, input_desc, filter_desc,
output_desc, conv_desc, stream->parent(), results);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
int idx, idx_no_scratch;
OP_REQUIRES_OK(ctx,
@ -713,18 +710,14 @@ struct LaunchConvOp<GPUDevice, T> {
AutoTuneConv3dExecutionPlan::GetInstance()->Insert(conv_parameters,
cached_plans);
} else {
#endif
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
AutoTuneConv3d::GetInstance()->Insert(conv_parameters,
algorithm_config);
#if GOOGLE_CUDA
}
#endif
}
Status cudnn_launch_status;
DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
#if GOOGLE_CUDA
if (CudnnUseFrontend()) {
if (algorithm_config.algorithm().has_value()) {
VLOG(4) << "Conv3D Execution Plan: "
@ -737,14 +730,11 @@ struct LaunchConvOp<GPUDevice, T> {
output_desc, &output_ptr, &scratch_allocator, algorithm_config,
nullptr);
} else {
#endif
cudnn_launch_status = stream->ConvolveWithAlgorithm(
input_desc, input_ptr, filter_desc, filter_ptr, conv_desc,
output_desc, &output_ptr, &scratch_allocator, algorithm_config,
nullptr);
#if GOOGLE_CUDA
}
#endif
if (!cudnn_launch_status.ok()) {
ctx->SetStatus(cudnn_launch_status);

View File

@ -20,6 +20,10 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/env_var.h"
#if GOOGLE_CUDA
#include "third_party/gpus/cudnn/cudnn.h"
#endif // GOOGLE_CUDA
namespace tensorflow {
#define ADD_BOOL_CUDNN_FLAG(func_name, flag_name, default_value) \
@ -32,8 +36,20 @@ namespace tensorflow {
return value; \
}
bool CudnnUseFrontend() {
#if GOOGLE_CUDA && CUDNN_VERSION >= 8100
bool value = false;
Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_FRONTEND", false, &value);
if (!status.ok()) {
LOG(ERROR) << status;
}
return value;
#else
return false;
#endif // GOOGLE_CUDA && CUDNN_VERSION >= 8100
}
ADD_BOOL_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true);
ADD_BOOL_CUDNN_FLAG(CudnnUseFrontend, TF_CUDNN_USE_FRONTEND, false);
// Whether to auto-tuning Cudnn RNN forward and backward pass to pick
// statistically the best cudnnRNNAlgo_t and cudnnMathType_t.
// The flag is disabled when TF_DEBUG_CUDNN_RNN is turned on.