8000 Update on "torch.sgn for complex tensors" · pytorch/pytorch@e154c5d · GitHub
[go: up one dir, main page]

Skip to content

Commit e154c5d

Browse files
committed
Update on "torch.sgn for complex tensors"
resolves #36323 by adding `torch.sgn` for complex tensors. `torch.sgn` returns `x/abs(x)` for `x != 0` and returns `0 + 0j` for `x==0` This PR doesn't test the correctness of the gradients. It will be done as a part of auditing all the ops in future once we decide the autograd behavior (JAX vs TF) and add gradchek. Differential Revision: [D23460526](https://our.internmc.facebook.com/intern/diff/D23460526) [ghstack-poisoned]
2 parents bd99fc9 + 5973b44 commit e154c5d

File tree

208 files changed

+8482
-5433
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

208 files changed

+8482
-5433
lines changed

.circleci/cimodel/data/pytorch_build_definitions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def gen_dependent_configs(xenial_parent_config):
191191
restrict_phases=["test"],
192192
gpu_resource=gpu,
193193
parent_build=xenial_parent_config,
194-
is_important=xenial_parent_config.is_important,
194+
is_important=False,
195195
)
196196

197197
configs.append(c)
@@ -353,7 +353,7 @@ def instantiate_configs():
353353
):
354354
c.dependent_tests = gen_docs_configs(c)
355355

356-
if cuda_version == "10.1" and python_version == "3.6" and not is_libtorch:
356+
if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:
357357
c.dependent_tests = gen_dependent_configs(c)
358358

359359
if (

.circleci/config.yml

Lines changed: 29 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,35 +1011,6 @@ jobs:
10111011
<<: *binary_checkout
10121012
- run:
10131013
<<: *binary_populate_env
1014-
- run:
1015-
name: Install unbuffer and ts
1016-
command: |
1017-
set -eux -o pipefail
1018-
source /env
1019-
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
1020-
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
1021-
retry yum -q -y install epel-release
1022-
retry yum -q -y install expect moreutils
1023-
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
1024-
retry apt-get update
1025-
retry apt-get -y install expect moreutils
1026-
retry conda install -y -c eumetsat expect
1027-
retry conda install -y cmake
1028-
fi
1029-
- run:
1030-
name: Update compiler to devtoolset7
1031-
command: |
1032-
set -eux -o pipefail
1033-
source /env
1034-
if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then
1035-
source "/builder/update_compiler.sh"
1036-
1037-
# Env variables are not persisted into the next step
1038-
echo "export PATH=$PATH" >> /env
1039-
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /env
1040-
else
1041-
echo "Not updating compiler"
1042-
fi
10431014
- run:
10441015
name: Build
10451016
no_output_timeout: "1h"
@@ -1059,7 +1030,6 @@ jobs:
10591030
python3 -mpip install requests && \
10601031
SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
10611032
python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
1062-
10631033
- persist_to_workspace:
10641034
root: /
10651035
paths: final_pkgs
@@ -6326,71 +6296,71 @@ workflows:
63266296
- /release\/.*/
63276297
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7-build"
63286298
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6299+
- pytorch_linux_build:
6300+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6301+
requires:
6302+
- "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6303+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
6304+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6305+
- pytorch_linux_test:
6306+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
6307+
requires:
6308+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6309+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
6310+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6311+
use_cuda_docker_runtime: "1"
6312+
resource_class: gpu.medium
63296313
- pytorch_linux_test:
6330-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_multigpu_test
6314+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_multigpu_test
63316315
requires:
6332-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6316+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63336317
filters:
63346318
branches:
63356319
only:
63366320
- master
63376321
- /ci-all\/.*/
63386322
- /release\/.*/
6339-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-multigpu-test"
6340-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6323+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-multigpu-test"
6324+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63416325
use_cuda_docker_runtime: "1"
63426326
resource_class: gpu.large
63436327
- pytorch_linux_test:
6344-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX2_test
6328+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_nogpu_NO_AVX2_test
63456329
requires:
6346-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6330+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63476331
filters:
63486332
branches:
63496333
only:
63506334
- master
63516335
- /ci-all\/.*/
63526336
- /release\/.*/
6353-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-nogpu-NO_AVX2-test"
6354-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6337+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-nogpu-NO_AVX2-test"
6338+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63556339
resource_class: large
63566340
- pytorch_linux_test:
6357-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX_test
6341+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_nogpu_NO_AVX_test
63586342
requires:
6359-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6343+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63606344
filters:
63616345
branches:
63626346
only:
63636347
- master
63646348
- /ci-all\/.*/
63656349
- /release\/.*/
6366-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-nogpu-NO_AVX-test"
6367-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6350+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-nogpu-NO_AVX-test"
6351+
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
63686352
resource_class: large
63696353
- pytorch_linux_test:
6370-
name: pytorch_linux_xenial_cuda10_1_cudnn7_py3_slow_test
6354+
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test
63716355
requires:
6372-
- pytorch_linux_xenial_cuda10_1_cudnn7_py3_gcc7_build
6356+
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
63736357
filters:
63746358
branches:
63756359
only:
63766360
- master
63776361
- /ci-all\/.*/
63786362
- /release\/.*/
6379-
build_environment: "pytorch-linux-xenial-cuda10.1-cudnn7-py3-slow-test"
6380-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7"
6381-
use_cuda_docker_runtime: "1"
6382-
resource_class: gpu.medium
6383-
- pytorch_linux_build:
6384-
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6385-
requires:
6386-
- "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6387-
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
6388-
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
6389-
- pytorch_linux_test:
6390-
name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test
6391-
requires:
6392-
- pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
6393-
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-test"
6363+
build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-slow-test"
63946364
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7 10000 "
63956365
use_cuda_docker_runtime: "1"
63966366
resource_class: gpu.medium

.circleci/scripts/binary_linux_build.sh

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -eux -o pipefail
55
source /env
66

77
# Defaults here so they can be changed in one place
8-
export MAX_JOBS=12
8+
export MAX_JOBS=${MAX_JOBS:-$(nproc --ignore=1)}
99

1010
# Parse the parameters
1111
if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
@@ -16,15 +16,5 @@ else
1616
build_script='manywheel/build.sh'
1717
fi
1818

19-
# We want to call unbuffer, which calls tclsh which finds the expect
20-
# package. The expect was installed by yum into /usr/bin so we want to
21-
# find /usr/bin/tclsh, but this is shadowed by /opt/conda/bin/tclsh in
22-
# the conda docker images, so we prepend it to the path here.
23-
if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
24-
mkdir /just_tclsh_bin
25-
ln -s /usr/bin/tclsh /just_tclsh_bin/tclsh
26-
export PATH=/just_tclsh_bin:$PATH
27-
fi
28-
2919
# Build the package
30-
SKIP_ALL_TESTS=1 unbuffer "/builder/$build_script" | ts
20+
SKIP_ALL_TESTS=1 stdbuf -i0 -o0 -e0 "/builder/$build_script"

.circleci/verbatim-sources/job-specs/binary-job-specs.yml

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,6 @@
77
<<: *binary_checkout
88
- run:
99
<<: *binary_populate_env
10-
- run:
11-
name: Install unbuffer and ts
12-
command: |
13-
set -eux -o pipefail
14-
source /env
15-
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
16-
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
17-
retry yum -q -y install epel-release
18-
retry yum -q -y install expect moreutils
19-
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
20-
retry apt-get update
21-
retry apt-get -y install expect moreutils
22-
retry conda install -y -c eumetsat expect
23-
retry conda install -y cmake
24-
fi
25-
- run:
26-
name: Update compiler to devtoolset7
27-
command: |
28-
set -eux -o pipefail
29-
source /env
30-
if [[ "$DESIRED_DEVTOOLSET" == 'devtoolset7' ]]; then
31-
source "/builder/update_compiler.sh"
32-
33-
# Env variables are not persisted into the next step
34-
echo "export PATH=$PATH" >> /env
35-
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> /env
36-
else
37-
echo "Not updating compiler"
38-
fi
3910
- run:
4011
name: Build
4112
no_output_timeout: "1h"
@@ -55,7 +26,6 @@
5526
python3 -mpip install requests && \
5627
SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
5728
python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
58-
5929
- persist_to_workspace:
6030
root: /
6131
paths: final_pkgs

.github/workflows/lint.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ jobs:
144144
# Run Clang-Tidy
145145
# The negative filters below are to exclude files that include onnx_pb.h or
146146
# caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
147+
# FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
148+
# in a follow up PR.
147149
python tools/clang_tidy.py \
148150
--verbose \
149151
--paths torch/csrc/ \
@@ -157,6 +159,7 @@ jobs:
157159
-g"-torch/csrc/onnx/init.cpp" \
158160
-g"-torch/csrc/cuda/nccl.*" \
159161
-g"-torch/csrc/cuda/python_nccl.cpp" \
162+
-g"-torch/csrc/autograd/FunctionsManual.cpp" \
160163
"$@" > ${GITHUB_WORKSPACE}/clang-tidy-output.txt
161164
162165
cat ${GITHUB_WORKSPACE}/clang-tidy-output.txt

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ torch/nn/functional.pyi
5757
torch/csrc/autograd/generated/*
5858
# Listed manually because some files in this directory are not generated
5959
torch/testing/_internal/generated/annotated_fn_args.py
60+
torch/testing/_internal/data/*.pt
6061
torch/csrc/cudnn/cuDNN.cpp
6162
torch/csrc/generated
6263
torch/csrc/generic/TensorMethods.cpp

CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ cmake_dependent_option(
136136
CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
137137
"NOT BUILD_SHARED_LIBS" OFF)
138138
option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
139+
option(BUILD_STATIC_RUNTIME_BENCHMARK "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF)
139140
option(BUILD_MOBILE_BENCHMARKS "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF)
140141
option(BUILD_MOBILE_TEST "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)" OFF)
141142
option(BUILD_JNI "Build JNI bindings" OFF)
@@ -609,6 +610,21 @@ if(USE_ASAN)
609610
string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fsanitize=address")
610611
endif()
611612

613+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
614+
include(CheckCSourceCompiles)
615+
check_c_source_compiles("#include <arm_neon.h>
616+
int main() {
617+
float a[] = {1.0, 1.0};
618+
vld1q_f32_x2(a);
619+
return 0;
620+
}" HAS_VLD1)
621+
622+
if(NOT HAS_VLD1)
623+
string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
624+
endif()
625+
endif()
626+
627+
612628
# Add code coverage flags to supported compilers
613629
if(CODE_COVERAGE)
614630
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")

aten/src/ATen/BatchedFallback.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ void batchedTensorForLoopFallback(const c10::OperatorHandle& op, torch::jit::Sta
5151
const auto& schema = op.schema();
5252
const auto num_returns = schema.returns().size();
5353
TORCH_CHECK(!schema.is_mutable() && !schema.hasAnyAliasInfo(),
54-
"Batching rule not implemented for ", schema, "; ",
54+
"Batching rule not implemented for ", schema.operator_name(), "; ",
5555
"the fallback path doesn't work on in-place or view ops.");
5656
TORCH_CHECK(areAllReturnsTensors(schema) && !areAnyArgumentsTensorList(schema),
57-
"Batching rule not implemented for ", schema, ". ",
57+
"Batching rule not implemented for ", schema.operator_name(), ". ",
5858
"We could not generate a fallback.");
5959
TORCH_CHECK(num_returns >= 1,
60-
"Batching rule not implemented for ", schema, ". ",
60+
"Batching rule not implemented for ", schema.operator_name(), ". ",
6161
"The fallback path does not support operations with no returns.");
62-
TORCH_WARN("Batching rule not implemented for ", schema, " falling back "
62+
TORCH_WARN("Batching rule not implemented for ", schema.operator_name(), " falling back "
6363
"to slow (for loop and stack) implementation");
6464

6565
const auto num_arguments = schema.arguments().size();

aten/src/ATen/Context.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ void Context::alertNotDeterministic(c10::string_view const& caller) {
7878
}
7979
}
8080

81+
bool Context::allowTF32CuDNN() const {
82+
return allow_tf32_cudnn;
83+
}
84+
85+
void Context::setAllowTF32CuDNN(bool b) {
86+
allow_tf32_cudnn = b;
87+
}
88+
8189
static const char cublas_config_var_name[] = "CUBLAS_WORKSPACE_CONFIG";
8290
static const char* const cublas_deterministic_configs[] = { ":4096:8", ":16:8" };
8391

aten/src/ATen/Context.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ class CAFFE2_API Context {
115115
bool deterministic() const;
116116
void setDeterministic(bool);
117117
void alertNotDeterministic(c10::string_view const& caller);
118+
bool allowTF32CuDNN() const;
119+
void setAllowTF32CuDNN(bool);
118120
bool allowTF32CuBLAS() const;
119121
void setAllowTF32CuBLAS(bool);
120122
void alertCuBLASConfigNotDeterministic();
@@ -146,6 +148,7 @@ class CAFFE2_API Context {
146148
bool deterministic_cudnn = false;
147149
bool _deterministic = false;
148150
bool benchmark_cudnn = false;
151+
bool allow_tf32_cudnn = true;
149152
bool allow_tf32_cublas = true;
150153
bool enabled_mkldnn = true;
151154
#ifdef C10_MOBILE

aten/src/ATen/NumericUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ inline C10_HOST_DEVICE bool _isnan(T val) {
3434
}
3535

3636
template <typename T,
37-
typename std::enable_if<c10::is_complex_t<T>::value, int>::type = 0>
37+
typename std::enable_if<c10::is_complex<T>::value, int>::type = 0>
3838
inline bool _isnan(T val) {
3939
return std::isnan(val.real()) || std::isnan(val.imag());
4040
}

aten/src/ATen/autocast_mode.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ TORCH_LIBRARY_IMPL(_, Autocast, m) {
255255
}
256256

257257
TORCH_LIBRARY_IMPL(aten, Autocast, m) {
258-
KERNEL(ADD_NS(_convolution), "_convolution", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool), fp16)
258+
KERNEL(ADD_NS(_convolution), "_convolution.deprecated", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool), fp16)
259+
KERNEL(ADD_NS(_convolution), "_convolution", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool), fp16)
259260
KERNEL(ADD_NS(_convolution_nogroup), "_convolution_nogroup", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef), fp16)
260261
KERNEL(ADD_NS(conv1d), "conv1d", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, int64_t), fp16)
261262
KERNEL(ADD_NS(conv2d), "conv2d", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, int64_t), fp16)
@@ -267,8 +268,10 @@ TORCH_LIBRARY_IMPL(aten, Autocast, m) {
267268
KERNEL(ADD_NS(convolution), "convolution", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t), fp16)
268269
KERNEL(ADD_NS(cudnn_convolution), "cudnn_convolution.deprecated", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool), fp16)
269270
KERNEL(ADD_NS(cudnn_convolution_transpose), "cudnn_convolution_transpose.deprecated", Tensor (const Tensor &, const Tensor &, const c10::optional<Tensor>&, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool), fp16)
270-
KERNEL(ADD_NS(cudnn_convolution), "cudnn_convolution", Tensor (const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool), fp16)
271-
KERNEL(ADD_NS(cudnn_convolution_transpose), "cudnn_convolution_transpose", Tensor (const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool), fp16)
271+
KERNEL(ADD_NS(cudnn_convolution), "cudnn_convolution.deprecated2", Tensor (const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool), fp16)
272+
KERNEL(ADD_NS(cudnn_convolution_transpose), "cudnn_convolution_transpose.deprecated2", Tensor (const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool), fp16)
273+
KERNEL(ADD_NS(cudnn_convolution), "cudnn_convolution", Tensor (const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool), fp16)
274+
KERNEL(ADD_NS(cudnn_convolution_transpose), "cudnn_convolution_transpose", Tensor (const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool), fp16)
272275
KERNEL(ADD_NS(prelu), "prelu", Tensor (const Tensor &, const Tensor &), fp16)
273276
KERNEL(ADD_NS(addmm), "addmm", Tensor (const Tensor &, const Tensor &, const Tensor &, Scalar, Scalar), fp16)
274277
KERNEL(ADD_NS(addmv), "addmv", Tensor (const Tensor &, const Tensor &, const Tensor &, Scalar, Scalar), fp16)

aten/src/ATen/core/aten_interned_strings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ _(aten, clamp_min) \
238238
_(aten, clone) \
239239
_(aten, coalesce) \
240240
_(aten, combinations) \
241+
_(aten, _conj) \
242+
_(aten, conj) \
241243
_(aten, complex) \
242244
_(aten, polar) \
243245
_(aten, constant_pad_nd) \

aten/src/ATen/core/jit_type.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ struct CAFFE2_API SymbolicShape {
507507
};
508508

509509
template <typename T>
510-
struct CAFFE2_API VaryingShape {
510+
struct VaryingShape {
511511
using ListOfOptionalElements = std::vector<c10::optional<T>>;
512512
VaryingShape(const std::vector<T>& vec)
513513
: VaryingShape(ListOfOptionalElements(vec.begin(), vec.end())) {}
@@ -548,7 +548,7 @@ struct CAFFE2_API VaryingShape {
548548
return dims_;
549549
}
550550

551-
VaryingShape merge(const VaryingShape& other) const;
551+
CAFFE2_API VaryingShape merge(const VaryingShape& other) const;
552552

553553
c10::optional<std::vector<T>> concrete_sizes() const {
554554
if (!dims_) {

0 commit comments

Comments
 (0)
0