pytorch
diff --git a/‎.circleci/docker/build_docker.sh
Lines changed: 8 additions & 0 deletions b/‎.circleci/docker/build_docker.sh
Lines changed: 8 additions & 0 deletions
diff --git a/‎.circleci/docker/common/install_base.sh
Lines changed: 2 additions & 1 deletion b/‎.circleci/docker/common/install_base.sh
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/actions/calculate-docker-image/action.yml
Lines changed: 5 additions & 0 deletions b/‎.github/actions/calculate-docker-image/action.yml
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/ci_commit_pins/torchdynamo.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/torchdynamo.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/trymerge.py
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/trymerge.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_android-build-test.yml
Lines changed: 0 additions & 3 deletions b/‎.github/workflows/_android-build-test.yml
Lines changed: 0 additions & 3 deletions
diff --git a/‎.github/workflows/docker-builds.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/docker-builds.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/trunk.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎.jenkins/pytorch/common_utils.sh
Lines changed: 1 addition & 2 deletions b/‎.jenkins/pytorch/common_utils.sh
Lines changed: 1 addition & 2 deletions
diff --git a/‎.jenkins/pytorch/test.sh
Lines changed: 38 additions & 1 deletion b/‎.jenkins/pytorch/test.sh
Lines changed: 38 additions & 1 deletion
diff --git a/‎.jenkins/pytorch/win-test-helpers/build_pytorch.bat
Lines changed: 1 addition & 1 deletion b/‎.jenkins/pytorch/win-test-helpers/build_pytorch.bat
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/Context.cpp
Lines changed: 8 additions & 0 deletions b/‎aten/src/ATen/Context.cpp
Lines changed: 8 additions & 0 deletions
diff --git a/‎aten/src/ATen/Context.h
Lines changed: 3 additions & 0 deletions b/‎aten/src/ATen/Context.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/FunctionalizeFallbackKernel.cpp
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/FunctionalizeFallbackKernel.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/NestedTensorImpl.h
Lines changed: 3 additions & 0 deletions b/‎aten/src/ATen/NestedTensorImpl.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/TensorIterator.cpp
Lines changed: 2 additions & 0 deletions b/‎aten/src/ATen/TensorIterator.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎aten/src/ATen/core/PythonFallbackKernel.cpp
Lines changed: 3 additions & 3 deletions b/‎aten/src/ATen/core/PythonFallbackKernel.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎aten/src/ATen/core/TensorBase.h
Lines changed: 2 additions & 6 deletions b/‎aten/src/ATen/core/TensorBase.h
Lines changed: 2 additions & 6 deletions
diff --git a/‎aten/src/ATen/mps/MPSAllocator.h
Lines changed: 11 additions & 3 deletions b/‎aten/src/ATen/mps/MPSAllocator.h
Lines changed: 11 additions & 3 deletions
@@ -18,6 +18,7 @@ tag="${DOCKER_TAG}"
 
 registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
 image="${registry}/pytorch/${IMAGE_NAME}"
+ghcr_image="ghcr.io/pytorch/ci-image"
 
 login() {
   aws ecr get-authorization-token --region us-east-1 --output text --query 'authorizationData[].authorizationToken' |
@@ -54,6 +55,13 @@ if [ "${DOCKER_SKIP_PUSH:-true}" = "false" ]; then
   if ! docker manifest inspect "${image}:${tag}" >/dev/null 2>/dev/null; then
     docker push "${image}:${tag}"
   fi
+
+  if [ "${PUSH_GHCR_IMAGE:-}" = "true" ]; then
+    # Push docker image to the ghcr.io
+    echo $GHCR_PAT | docker login ghcr.io -u pytorch --password-stdin
+    docker tag "${image}:${tag}" "${ghcr_image}:${IMAGE_NAME}-${tag}"
+    docker push "${ghcr_image}:${IMAGE_NAME}-${tag}"
+  fi
 fi
 
 if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
 
@@ -66,7 +66,8 @@ install_ubuntu() {
     software-properties-common \
     wget \
     sudo \
-    vim
+    vim \
+    jq
 
   # Should resolve issues related to various apt package repository cert issues
   # see: https://github.com/pytorch/pytorch/issues/65931
 
@@ -23,6 +23,9 @@ inputs:
   force_push:
     description: If set to any value, always run the push
     required: false
+  push-ghcr-image:
+    description: If set to any value, push docker image to the ghcr.io.
+    required: false
 
 outputs:
   docker-image:
@@ -102,6 +105,8 @@ runs:
         # Skip push if we don't need it, or if specified in the inputs
         DOCKER_SKIP_PUSH: ${{ steps.check.outputs.skip_push || inputs.skip_push }}
         DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker-tag }}
+        PUSH_GHCR_IMAGE: ${{ inputs.push-ghcr-image }}
+        GHCR_PAT: ${{ env.GHCR_PAT }}
       working-directory: .circleci/docker
       shell: bash
       run: |
 
@@ -1 +1 @@
-ad21e440dc9e5cdd77785fe1ea9979a53262114e
+5f2f374d9bbc6374fe725a17182e7d4c270c6833
@@ -990,7 +990,7 @@ def fetch_check_run_conclusions(repo: GitRepo, commit: str) -> Dict[str, Tuple[s
     [owner, name] = repo.gh_owner_and_name()
     checks = fetch_json_dict(f'https://api.github.com/repos/{owner}/{name}/commits/{commit}/check-runs')
     check_run_conclusions = {}
-    if len(checks) == 0:
+    if len(checks['check_runs']) == 0:
         raise MandatoryChecksMissingError("Refusing to merge as land check(s) are not yet run")
     for check_run in checks['check_runs']:
         check_run_conclusions[check_run['name']] = (check_run['conclusion'],
 
@@ -73,9 +73,6 @@ jobs:
           # 1) Not shareable: it's custom selective build, which is different from default libtorch mobile build;
           # 2) Not parallelizable by architecture: it only builds libtorch for one architecture;
 
-          echo "DOCKER_IMAGE: ${
97AE
DOCKER_IMAGE}"
-          time docker pull "${DOCKER_IMAGE}" >/dev/null
-
           export BUILD_LITE_INTERPRETER
           BUILD_LITE_INTERPRETER="1"
           if [[ "${BUILD_ENVIRONMENT}" == *"full-jit" ]]; then
 
@@ -62,11 +62,14 @@ jobs:
       - name: Build docker image
         id: build-docker-image
         uses: ./.github/actions/calculate-docker-image
+        env:
+          GHCR_PAT: ${{ secrets.GHCR_PAT }}
         with:
           docker-image-name: ${{ matrix.docker-image-name }}
           always-rebuild: true
           skip_push: false
           force_push: true
+          push-ghcr-image: ${{ github.event_name == 'push' }}
 
       - name: Pull docker image
         uses: ./.github/actions/pull-docker-image
 
@@ -223,7 +223,6 @@ jobs:
         ]}
 
   linux-bionic-rocm5_1-py3_7-build:
-    if: false
     name: linux-bionic-rocm5.1-py3.7
     uses: ./.github/workflows/_linux-build.yml
     with:
 
@@ -144,8 +144,7 @@ function print_sccache_stats() {
   sccache --show-stats
 
   if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then
-    sccache --show-stats \
-      | python -m tools.stats.scc
F438
ache_stats_to_json \
+    sccache --show-stats --stats-format json | jq .stats \
       > "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json"
   else
     echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json"
 
@@ -178,6 +178,33 @@ test_python() {
   assert_git_not_dirty
 }
 
+
+test_dynamo_shard() {
+  if [[ -z "$NUM_TEST_SHARDS" ]]; then
+    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
+    exit 1
+  fi
+  time python test/run_test.py \
+    --exclude-jit-executor \
+    --exclude-distributed-tests \
+    --exclude \
+      test_autograd \
+      test_proxy_tensor \
+      test_quantization \
+      test_public_bindings \
+      test_dataloader \
+      test_reductions \
+      test_namedtensor \
+      test_namedtuple_return_api \
+      test_profiler \
+      test_profiler_tree \
+      test_overrides \
+      test_python_dispatch \
+    --shard "$1" "$NUM_TEST_SHARDS" \
+    --verbose
+  assert_git_not_dirty
+}
+
 test_python_gloo_with_tls() {
   source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
   assert_git_not_dirty
@@ -599,6 +626,17 @@ elif [[ "$TEST_CONFIG" == distributed ]]; then
   if [[ "${SHARD_NUMBER}" == 1 ]]; then
     test_rpc
   fi
+elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
+  test_without_numpy
+  install_torchvision
+  install_torchdynamo
+  test_dynamo_shard 1
+  test_aten
+elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
+  install_torchvision
+  checkout_install_torchdynamo
+  test_dynamo_shard 2
+  test_dynamo
 elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
   test_without_numpy
   install_torchvision
@@ -614,7 +652,6 @@ elif [[ "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
   test_custom_script_ops
   test_custom_backend
   test_torch_function_benchmark
-  test_dynamo
 elif [[ "${SHARD_NUMBER}" -gt 2 ]]; then
   # Handle arbitrary number of shards
   install_torchdynamo
 
@@ -154,5 +154,5 @@ python setup.py install --cmake && sccache --show-stats && (
   )
 )
 
-sccache --show-stats | python tools/stats/sccache_stats_to_json.py > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
+sccache --show-stats --stats-format json | jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
 sccache --stop-server
@@ -144,6 +144,14 @@ void Context::setBenchmarkCuDNN(bool b) {
   benchmark_cudnn = b;
 }
 
+int Context::benchmarkLimitCuDNN() const {
+  return benchmark_limit_cudnn;
+}
+
+void Context::setBenchmarkLimitCuDNN(int b) {
+  benchmark_limit_cudnn = b;
+}
+
 bool Context::allowTF32CuBLAS() const {
   static bool allow_tf32_cublas_override = c10::utils::check_env("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE") == true;
   return allow_tf32_cublas_override || float32_matmul_precision != at::Float32MatmulPrecision::HIGHEST;
 
@@ -121,6 +121,8 @@ class TORCH_API Context {
   void setUserEnabledMkldnn(bool e);
   bool benchmarkCuDNN() const;
   void setBenchmarkCuDNN(bool);
+  int benchmarkLimitCuDNN() const;
+  void setBenchmarkLimitCuDNN(int);
   bool deterministicCuDNN() const;
   void setDeterministicCuDNN(bool);
 
@@ -254,6 +256,7 @@ class TORCH_API Context {
   bool benchmark_cudnn = false;
   Float32MatmulPrecision float32_matmul_precision =
       at::Float32MatmulPrecision::HIGHEST;
+  int benchmark_limit_cudnn = 10;
   bool allow_tf32_cudnn = true;
   bool allow_fp16_reduction_cublas = true;
   bool enabled_mkldnn = true;
 
@@ -131,7 +131,7 @@ const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatchKeySet,
   at::Tensor tmp_output;
   {
     at::AutoDispatchSkipFunctionalize guard;
-    tmp_output = at::resize_functional(self_, size, memory_format);
+    tmp_output = at::resize(self_, size, memory_format);
   }
 
   auto itemsize = self.dtype().itemsize();
 
@@ -68,6 +68,9 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl {
   int64_t size_custom(int64_t d) const override {
     return this->size(d);
   }
+  c10::SymInt sym_size_custom(int64_t d) const override {
+    return c10::SymInt{this->size(d)};
+  }
   IntArrayRef sizes_custom() const override;
   c10::SymIntArrayRef sym_sizes_custom() const override;
   c10::SymIntArrayRef sym_sizes() const override;
 
@@ -946,6 +946,8 @@ void TensorIteratorBase::build_ternary_op(
     const TensorBase& out, const TensorBase& a,
     const TensorBase& b, const TensorBase& c) {
   build(TensorIteratorConfig()
+      .promote_inputs_to_common_dtype(true)
+      .enforce_safe_casting_to_output(true)
       .add_owned_output(out)
       .add_owned_input(a)
       .add_owned_input(b)
 
@@ -53,7 +53,7 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
   // If Torch Dispatch Mode is active, use its PyInterpreter for dispatch
   const auto& maybe_torch_dispatch_mode_state = at::impl::TorchDispatchModeTLS::get_state();
   if (maybe_torch_dispatch_mode_state) {
-    maybe_torch_dispatch_mode_state->pyinterpreter()->dispatch(op, stack, maybe_torch_dispatch_mode_state);
+    maybe_torch_dispatch_mode_state->pyinterpreter()->dispatch(op, stack);
     return;
   }
 
@@ -69,7 +69,7 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
     if (ivalue.isTensor()) {
       auto* interpreter = ivalue.unsafeToTensorImpl()->pyobj_interpreter();
       if (interpreter) {
-        interpreter->dispatch(op, stack, nullptr);
+        interpreter->dispatch(op, stack);
         return;
       }
     } else if (ivalue.isTensorList()) {
@@ -78,7 +78,7 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
       for (const auto& nv : ivalue.toListRef()) {
         auto* interpreter = nv.unsafeToTensorImpl()->pyobj_interpreter();
         if (interpreter) {
-          interpreter->dispatch(op, stack, nullptr);
+          interpreter->dispatch(op, stack);
           return;
         }
       }
 
@@ -157,11 +157,7 @@ class TORCH_API TensorBase {
   }
 
   c10::SymInt sym_size(int64_t dim) const {
-    const auto sizes = this->sym_sizes();
-    const auto ndim = static_cast<int64_t>(sizes.size());
-    // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping)
-    return sizes[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)];
-
+    return impl_->sym_size(dim);
   }
 
   int64_t size(int64_t dim) const {
@@ -349,7 +345,7 @@ class TORCH_API TensorBase {
   }
 
   /// Returns a `Tensor`'s layout.
-  Layout layout() const noexcept {
+  Layout layout() const {
     return impl_->layout();
   }
 
 
@@ -63,13 +63,18 @@ struct HeapBlock;
 struct BufferBlock
 {
   id<MTLBuffer> buffer;
-  size_t size;
+  size_t size; // size after alignment
+  size_t requested_size; // requested size (before alignment)
+  // buffer shape is used for retrieving base of views in cached graphs
+  std::vector<int64_t> shape;
   bool in_use;
   HeapBlock* heap;
   id_t buf_id;
 
-  BufferBlock(size_t Size, const id<MTLBuffer> Buffer = nullptr, HeapBlock* Heap = nullptr, id_t BufID = 0) :
-            buffer(Buffer), size(Size), in_use(false), heap(Heap), buf_id(BufID) { }
+  BufferBlock(size_t Size, size_t RequestedSize = 0, const id<MTLBuffer> Buffer = nullptr,
+              HeapBlock* Heap = nullptr, id_t BufID = 0) :
+              buffer(Buffer), size(Size), requested_size(RequestedSize),
+              in_use(false), heap(Heap), buf_id(BufID) { }
 
   static bool Comparator(const BufferBlock* a, const BufferBlock* b) {
     return (a->size != b->size) ? a->size < b->size : (uintptr_t)a->buffer < (uintptr_t)b->buffer;
@@ -193,6 +198,9 @@ class MPSHeapAllocatorImpl
   void Free(void* ptr);
   void EmptyCache();
   bool isSharedBuffer(void* ptr);
+  ssize_t getRequestedBufferSize(void* ptr);
+  void setBufferShape(void* ptr, const IntArrayRef& shape);
+  IntArrayRef getBufferShape(void* ptr);
 
   inline id<MTLDevice> Device() const { return m_device; }
   void enable_debug_info() { m_enable_debug_info = true; }
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-ad21e440dc9e5cdd77785fe1ea9979a53262114e`
	`1`	`+5f2f374d9bbc6374fe725a17182e7d4c270c6833`
Original file line number	Diff line number	Diff line change
`@@ -154,5 +154,5 @@ python setup.py install --cmake && sccache --show-stats && (`
`154`	`154`	`)`
`155`	`155`	`)`
`156`	`156`
`157`		`-sccache --show-stats \| python tools/stats/sccache_stats_to_json.py > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json`
	`157`	`+sccache --show-stats --stats-format json \| jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json`
`158`	`158`	`sccache --stop-server`
Original file line number	Diff line number	Diff line change
`@@ -131,7 +131,7 @@ const at::Tensor & resize__functionalization(c10::DispatchKeySet dispatchKeySet,`
`131`	`131`	`at::Tensor tmp_output;`
`132`	`132`	`{`
`133`	`133`	`at::AutoDispatchSkipFunctionalize guard;`
`134`		`- tmp_output = at::resize_functional(self_, size, memory_format);`
	`134`	`+ tmp_output = at::resize(self_, size, memory_format);`
`135`	`135`	`}`
`136`	`136`
`137`	`137`	`auto itemsize = self.dtype().itemsize();`
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {`
`53`	`53`	`// If Torch Dispatch Mode is active, use its PyInterpreter for dispatch`
`54`	`54`	`const auto& maybe_torch_dispatch_mode_state = at::impl::TorchDispatchModeTLS::get_state();`
`55`	`55`	`if (maybe_torch_dispatch_mode_state) {`
`56`		`- maybe_torch_dispatch_mode_state->pyinterpreter()->dispatch(op, stack, maybe_torch_dispatch_mode_state);`
	`56`	`+ maybe_torch_dispatch_mode_state->pyinterpreter()->dispatch(op, stack);`
`57`	`57`	`return;`
`58`	`58`	`}`
`59`	`59`
`@@ -69,7 +69,7 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {`
< 57AE code>69	`69`	`if (ivalue.isTensor()) {`
`70`	`70`	`auto* interpreter = ivalue.unsafeToTensorImpl()->pyobj_interpreter();`
`71`	`71`	`if (interpreter) {`
`72`		`- interpreter->dispatch(op, stack, nullptr);`
	`72`	`+ interpreter->dispatch(op, stack);`
`73`	`73`	`return;`
`74`	`74`	`}`
`75`	`75`	`} else if (ivalue.isTensorList()) {`
`@@ -78,7 +78,7 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {`
`78`	`78`	`for (const auto& nv : ivalue.toListRef()) {`
`79`	`79`	`auto* interpreter = nv.unsafeToTensorImpl()->pyobj_interpreter();`
`80`	`80`	`if (interpreter) {`
`81`		`- interpreter->dispatch(op, stack, nullptr);`
	`81`	`+ interpreter->dispatch(op, stack);`
`82`	`82`	`return;`
`83`	`83`	`}`
`84`	`84`	`}`
Original file line number	Diff line number	Diff line change
`@@ -157,11 +157,7 @@ class TORCH_API TensorBase {`
`157`	`157`	`}`
`158`	`158`
`159`	`159`	`c10::SymInt sym_size(int64_t dim) const {`
`160`		`- const auto sizes = this->sym_sizes();`
`161`		`- const auto ndim = static_cast<int64_t>(sizes.size());`
`162`		`- // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping)`
`163`		`- return sizes[c10::maybe_wrap_dim(dim, ndim, /wrap_scalar=/false)];`
`164`		`-`
	`160`	`+ return impl_->sym_size(dim);`
`165`	`161`	`}`
`166`	`162`
`167`	`163`	`int64_t size(int64_t dim) const {`
`@@ -349,7 +345,7 @@ class TORCH_API TensorBase {`
`349`	`345`	`}`
`350`	`346`
`351`	`347`	/// Returns a `Tensor`'s layout.
`352`		`- Layout layout() const noexcept {`
	`348`	`+ Layout layout() const {`
`353`	`349`	`return impl_->layout();`
`354`	`350`	`}`
`355`	`351`