mirror of
https://github.com/tensorflow/tensorflow.git
synced 2024-11-21 21:05:19 +00:00
Remove some macros
This commit is contained in:
parent
558472bcda
commit
4e780fed91
@ -1655,7 +1655,9 @@ tf_cuda_library(
|
||||
"//tensorflow/core/util:stats_calculator_portable",
|
||||
"//tensorflow/core/util:tensor_format",
|
||||
"//tensorflow/compiler/jit:common",
|
||||
] + if_static(
|
||||
] + if_cuda([
|
||||
"@local_config_cuda//cuda:cudnn_header",
|
||||
]) + if_static(
|
||||
extra_deps = ["@com_google_protobuf//:protobuf"],
|
||||
otherwise = ["@com_google_protobuf//:protobuf_headers"],
|
||||
),
|
||||
|
@ -988,19 +988,17 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
|
||||
#endif
|
||||
AlgorithmConfig algorithm_config;
|
||||
bool do_autotune;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConvBwdFilterExecutionPlan::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
} else {
|
||||
#endif
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConvBwdFilter::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// The "cached_plans" is used to store the selected execution plans from
|
||||
// autotuning to make them live long enough to the end of this op.
|
||||
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
|
||||
@ -1171,7 +1169,6 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
|
||||
filter_backprop_ptr, out_backprop_ptr, input_desc,
|
||||
filter_desc, output_desc, conv_desc,
|
||||
stream->parent(), results);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
int idx, idx_no_scratch;
|
||||
OP_REQUIRES_OK(ctx,
|
||||
@ -1195,18 +1192,14 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
|
||||
AutoTuneConvBwdFilterExecutionPlan::GetInstance()->Insert(conv_parameters,
|
||||
cached_plans);
|
||||
} else {
|
||||
#endif
|
||||
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
|
||||
AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters,
|
||||
algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Status cudnn_launch_status;
|
||||
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, ctx);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
if (algorithm_config.algorithm().has_value()) {
|
||||
VLOG(4) << "Conv2DBackpropFilter Execution Plan: "
|
||||
@ -1219,14 +1212,11 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
|
||||
filter_desc, &filter_backprop_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
} else {
|
||||
#endif
|
||||
cudnn_launch_status = stream->ConvolveBackwardFilterWithAlgorithm(
|
||||
input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc,
|
||||
filter_desc, &filter_backprop_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cudnn_launch_status.ok()) {
|
||||
ctx->SetStatus(cudnn_launch_status);
|
||||
|
@ -405,19 +405,17 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
|
||||
#endif
|
||||
AlgorithmConfig algorithm_config;
|
||||
bool do_autotune;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConvBwdDataExecutionPlan::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
} else {
|
||||
#endif
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConvBwdData::GetInstance()->Find(conv_parameters,
|
||||
&algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// The "cached_plans" is used to store the selected execution plans from
|
||||
// autotuning to make them live long enough to the end of this op.
|
||||
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
|
||||
@ -584,7 +582,6 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
|
||||
se::dnn::ConvolutionKind::BACKWARD_DATA, se::dnn::ToDataType<T>::value,
|
||||
in_backprop_ptr, filter_ptr, out_backprop_ptr, input_desc, filter_desc,
|
||||
output_desc, conv_desc, stream->parent(), results);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
int idx, idx_no_scratch;
|
||||
OP_REQUIRES_OK(ctx,
|
||||
@ -607,17 +604,13 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
|
||||
AutoTuneConvBwdDataExecutionPlan::GetInstance()->Insert(conv_parameters,
|
||||
cached_plans);
|
||||
} else {
|
||||
#endif
|
||||
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
|
||||
AutoTuneConvBwdData::GetInstance()->Insert(conv_parameters,
|
||||
algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Status cudnn_launch_status;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
if (algorithm_config.algorithm().has_value()) {
|
||||
VLOG(4) << "Conv2DBackpropInput Execution Plan: "
|
||||
@ -630,14 +623,11 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
|
||||
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
} else {
|
||||
#endif
|
||||
cudnn_launch_status = stream->ConvolveBackwardDataWithAlgorithm(
|
||||
filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc,
|
||||
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cudnn_launch_status.ok()) {
|
||||
ctx->SetStatus(cudnn_launch_status);
|
||||
|
@ -1428,19 +1428,17 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
|
||||
#endif
|
||||
AlgorithmConfig algorithm_config;
|
||||
bool do_autotune;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
do_autotune = cudnn_use_autotune_ &&
|
||||
!AutoTuneConv3dBwdDataExecutionPlan::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
} else {
|
||||
#endif
|
||||
do_autotune = cudnn_use_autotune_ &&
|
||||
!AutoTuneConv3dBwdData::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// The "cached_plans" is used to store the selected execution plans from
|
||||
// autotuning to make them live long enough to the end of this op.
|
||||
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
|
||||
@ -1590,7 +1588,6 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
|
||||
filter_ptr, out_backprop_ptr, input_desc,
|
||||
filter_desc, output_desc, conv_desc,
|
||||
stream->parent(), results);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
int idx, idx_no_scratch;
|
||||
OP_REQUIRES_OK(context,
|
||||
@ -1614,20 +1611,16 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
|
||||
AutoTuneConv3dBwdDataExecutionPlan::GetInstance()->Insert(
|
||||
conv_parameters, cached_plans);
|
||||
} else {
|
||||
#endif
|
||||
OP_REQUIRES_OK(context,
|
||||
BestCudnnConvAlgorithm(results, &algorithm_config));
|
||||
AutoTuneConv3dBwdData::GetInstance()->Insert(conv_parameters,
|
||||
algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Status cudnn_launch_status;
|
||||
DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
|
||||
context);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
if (algorithm_config.algorithm().has_value()) {
|
||||
VLOG(4) << "Conv3DBackpropInput Execution Plan: "
|
||||
@ -1640,14 +1633,11 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
|
||||
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
} else {
|
||||
#endif
|
||||
cudnn_launch_status = stream->ConvolveBackwardDataWithAlgorithm(
|
||||
filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc,
|
||||
input_desc, &in_backprop_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cudnn_launch_status.ok()) {
|
||||
context->SetStatus(cudnn_launch_status);
|
||||
@ -2050,19 +2040,17 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
|
||||
|
||||
bool do_autotune;
|
||||
AlgorithmConfig algorithm_config;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
do_autotune = cudnn_use_autotune_ &&
|
||||
!AutoTuneConv3dBwdFilterExecutionPlan::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
} else {
|
||||
#endif
|
||||
do_autotune = cudnn_use_autotune_ &&
|
||||
!AutoTuneConv3dBwdFilter::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// The "cached_plans" is used to store the selected execution plans from
|
||||
// autotuning to make them live long enough to the end of this op.
|
||||
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
|
||||
@ -2192,7 +2180,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
|
||||
filter_backprop_ptr, out_backprop_ptr, input_desc,
|
||||
filter_desc, output_desc, conv_desc,
|
||||
stream->parent(), results);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
int idx, idx_no_scratch;
|
||||
OP_REQUIRES_OK(context,
|
||||
@ -2216,7 +2203,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
|
||||
AutoTuneConv3dBwdFilterExecutionPlan::GetInstance()->Insert(
|
||||
conv_parameters, cached_plans);
|
||||
} else {
|
||||
#endif
|
||||
Status s = BestCudnnConvAlgorithm(results, &algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
if (s.code() == error::NOT_FOUND) {
|
||||
@ -2237,15 +2223,12 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
|
||||
OP_REQUIRES_OK(context, s);
|
||||
AutoTuneConv3dBwdFilter::GetInstance()->Insert(conv_parameters,
|
||||
algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Status cudnn_launch_status;
|
||||
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
|
||||
context);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
if (algorithm_config.algorithm().has_value()) {
|
||||
VLOG(4) << "Conv3DBackpropFilter Execution Plan: "
|
||||
@ -2258,14 +2241,11 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
|
||||
filter_desc, &filter_backprop_ptr, &scratch_allocator,
|
||||
algorithm_config, nullptr);
|
||||
} else {
|
||||
#endif
|
||||
cudnn_launch_status = stream->ConvolveBackwardFilterWithAlgorithm(
|
||||
input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc,
|
||||
filter_desc, &filter_backprop_ptr, &scratch_allocator,
|
||||
algorithm_config, nullptr);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cudnn_launch_status.ok()) {
|
||||
context->SetStatus(cudnn_launch_status);
|
||||
|
@ -996,18 +996,16 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
|
||||
#endif
|
||||
|
||||
bool do_autotune;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConvExecutionPlan::GetInstance()->Find(conv_parameters,
|
||||
&algorithm_config);
|
||||
} else {
|
||||
#endif
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// The "cached_plans" is used to store the selected execution plans from
|
||||
// autotuning to make them live long enough to the end of this op.
|
||||
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
|
||||
@ -1176,7 +1174,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
|
||||
output_tensor, input_desc, filter_desc, output_desc,
|
||||
conv_desc, stream->parent(), results);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
int idx, idx_no_scratch;
|
||||
OP_REQUIRES_OK(ctx,
|
||||
@ -1200,17 +1197,13 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
|
||||
AutoTuneConvExecutionPlan::GetInstance()->Insert(conv_parameters,
|
||||
cached_plans);
|
||||
} else {
|
||||
#endif
|
||||
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
|
||||
AutoTuneConv::GetInstance()->Insert(conv_parameters, algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Status cudnn_launch_status;
|
||||
DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
if (algorithm_config.algorithm().has_value()) {
|
||||
VLOG(4) << "Conv2D Execution Plan: "
|
||||
@ -1222,7 +1215,6 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
|
||||
input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc,
|
||||
&output_ptr, &scratch_allocator, algorithm_config, nullptr);
|
||||
} else {
|
||||
#endif
|
||||
VLOG(4) << "Convolution Algorithm: "
|
||||
<< algorithm_config.algorithm()->algo_id();
|
||||
VLOG(4) << "tensor_ops_enabled: "
|
||||
@ -1231,9 +1223,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
|
||||
cudnn_launch_status = stream->ConvolveWithAlgorithm(
|
||||
input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, output_desc,
|
||||
&output_ptr, &scratch_allocator, algorithm_config, nullptr);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cudnn_launch_status.ok()) {
|
||||
ctx->SetStatus(cudnn_launch_status);
|
||||
|
@ -510,19 +510,17 @@ struct LaunchConvOp<GPUDevice, T> {
|
||||
AlgorithmConfig algorithm_config;
|
||||
|
||||
bool do_autotune;
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConv3dExecutionPlan::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
} else {
|
||||
#endif
|
||||
do_autotune = cudnn_use_autotune &&
|
||||
!AutoTuneConv3d::GetInstance()->Find(
|
||||
conv_parameters, &algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// The "cached_plans" is used to store the selected execution plans from
|
||||
// autotuning to make them live long enough to the end of this op.
|
||||
std::vector<std::unique_ptr<se::dnn::ConvolveExecutionPlan>> cached_plans;
|
||||
@ -689,7 +687,6 @@ struct LaunchConvOp<GPUDevice, T> {
|
||||
se::dnn::ToDataType<T>::value, input_ptr,
|
||||
filter_ptr, output_ptr, input_desc, filter_desc,
|
||||
output_desc, conv_desc, stream->parent(), results);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
int idx, idx_no_scratch;
|
||||
OP_REQUIRES_OK(ctx,
|
||||
@ -713,18 +710,14 @@ struct LaunchConvOp<GPUDevice, T> {
|
||||
AutoTuneConv3dExecutionPlan::GetInstance()->Insert(conv_parameters,
|
||||
cached_plans);
|
||||
} else {
|
||||
#endif
|
||||
OP_REQUIRES_OK(ctx, BestCudnnConvAlgorithm(results, &algorithm_config));
|
||||
AutoTuneConv3d::GetInstance()->Insert(conv_parameters,
|
||||
algorithm_config);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Status cudnn_launch_status;
|
||||
DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
|
||||
#if GOOGLE_CUDA
|
||||
if (CudnnUseFrontend()) {
|
||||
if (algorithm_config.algorithm().has_value()) {
|
||||
VLOG(4) << "Conv3D Execution Plan: "
|
||||
@ -737,14 +730,11 @@ struct LaunchConvOp<GPUDevice, T> {
|
||||
output_desc, &output_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
} else {
|
||||
#endif
|
||||
cudnn_launch_status = stream->ConvolveWithAlgorithm(
|
||||
input_desc, input_ptr, filter_desc, filter_ptr, conv_desc,
|
||||
output_desc, &output_ptr, &scratch_allocator, algorithm_config,
|
||||
nullptr);
|
||||
#if GOOGLE_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!cudnn_launch_status.ok()) {
|
||||
ctx->SetStatus(cudnn_launch_status);
|
||||
|
@ -20,6 +20,10 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#include "third_party/gpus/cudnn/cudnn.h"
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
#define ADD_BOOL_CUDNN_FLAG(func_name, flag_name, default_value) \
|
||||
@ -32,8 +36,20 @@ namespace tensorflow {
|
||||
return value; \
|
||||
}
|
||||
|
||||
bool CudnnUseFrontend() {
|
||||
#if GOOGLE_CUDA && CUDNN_VERSION >= 8100
|
||||
bool value = false;
|
||||
Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_FRONTEND", false, &value);
|
||||
if (!status.ok()) {
|
||||
LOG(ERROR) << status;
|
||||
}
|
||||
return value;
|
||||
#else
|
||||
return false;
|
||||
#endif // GOOGLE_CUDA && CUDNN_VERSION >= 8100
|
||||
}
|
||||
|
||||
ADD_BOOL_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true);
|
||||
ADD_BOOL_CUDNN_FLAG(CudnnUseFrontend, TF_CUDNN_USE_FRONTEND, false);
|
||||
// Whether to auto-tuning Cudnn RNN forward and backward pass to pick
|
||||
// statistically the best cudnnRNNAlgo_t and cudnnMathType_t.
|
||||
// The flag is disabled when TF_DEBUG_CUDNN_RNN is turned on.
|
||||
|
Loading…
Reference in New Issue
Block a user