pytorch
diff --git a/‎.github/workflows/lint.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎BUILD.bazel
Lines changed: 15 additions & 0 deletions b/‎BUILD.bazel
Lines changed: 15 additions & 0 deletions
diff --git a/‎aten/src/ATen/core/TensorBase.h
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/core/TensorBase.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎c10/core/SymInt.cpp
Lines changed: 5 additions & 3 deletions b/‎c10/core/SymInt.cpp
Lines changed: 5 additions & 3 deletions
diff --git a/‎c10/core/SymInt.h
Lines changed: 25 additions & 7 deletions b/‎c10/core/SymInt.h
Lines changed: 25 additions & 7 deletions
diff --git a/‎c10/core/SymIntTable.cpp
Lines changed: 1 addition & 1 deletion b/‎c10/core/SymIntTable.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎c10/core/SymbolicIntNode.h
Lines changed: 1 addition & 1 deletion b/‎c10/core/SymbolicIntNode.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎c10/core/TensorImpl.cpp
Lines changed: 8 additions & 0 deletions b/‎c10/core/TensorImpl.cpp
Lines changed: 8 additions & 0 deletions
diff --git a/‎c10/core/TensorImpl.h
Lines changed: 15 additions & 0 deletions b/‎c10/core/TensorImpl.h
Lines changed: 15 additions & 0 deletions
diff --git a/‎c10/test/core/SymInt_test.cpp
Lines changed: 31 additions & 0 deletions b/‎c10/test/core/SymInt_test.cpp
Lines changed: 31 additions & 0 deletions
diff --git a/‎caffe2/core/tensor.h
Lines changed: 1 addition & 2 deletions b/‎caffe2/core/tensor.h
Lines changed: 1 addition & 2 deletions
diff --git a/‎test/cpp/lazy/test_lazy_ops.cpp
Lines changed: 13 additions & 0 deletions b/‎test/cpp/lazy/test_lazy_ops.cpp
Lines changed: 13 additions & 0 deletions
diff --git a/‎test/lazy/test_reuse_ir.py
Lines changed: 3 additions & 4 deletions b/‎test/lazy/test_reuse_ir.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎torch/__init__.py
Lines changed: 8 additions & 5 deletions b/‎torch/__init__.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎torch/csrc/lazy/core/tensor_impl.cpp
Lines changed: 13 additions & 0 deletions b/‎torch/csrc/lazy/core/tensor_impl.cpp
Lines changed: 13 additions & 0 deletions
diff --git a/‎torch/csrc/lazy/core/tensor_impl.h
Lines changed: 4 additions & 0 deletions b/‎torch/csrc/lazy/core/tensor_impl.h
Lines changed: 4 additions & 0 deletions
@@ -100,7 +100,7 @@ jobs:
         if: ${{ always() && steps.requirements.outcome == 'success' }}
         run: |
           set -eux
-          python torch/testing/_check_kernel_launches.py |& tee "${GITHUB_WORKSPACE}"/cuda_kernel_launch_checks.txt
+          python torch/testing/_internal/check_kernel_launches.py |& tee "${GITHUB_WORKSPACE}"/cuda_kernel_launch_checks.txt
 
   workflow-checks:
     name: workflow-checks
 
@@ -241,10 +241,25 @@ cc_library(
     strip_include_prefix = "aten/src/",
     deps = [
         ":aten_core_headers",
+        ":torch_base_headers",
         "//c10:headers",
     ],
 )
 
+# Temporary library to enable us to use strip_include_prefix =
+# "aten/src/" in aten_headers above. Only use this in aten_headers.
+cc_library(
+    name = "torch_base_headers",
+    hdrs = [
+        "torch/csrc/Export.h",
+        "torch/csrc/jit/frontend/function_schema_parser.h",
+    ],
+    deps = [
+        "//c10/macros",
+        "//c10/util:base",
+    ],
+)
+
 ATEN_COPTS = COMMON_COPTS + [
     "-DCAFFE2_BUILD_MAIN_LIBS",
     "-DHAVE_AVX_CPU_DEFINITION",
 
@@ -221,7 +221,7 @@ class TORCH_API TensorBase {
     return impl_->sizes();
   }
   c10::SymIntArrayRef sym_sizes() const {
-    return c10::SymIntArrayRef(reinterpret_cast<const SymInt*>(sizes().data()), sizes().size());
+    return impl_->sym_sizes();
   }
   IntArrayRef strides() const {
     return impl_->strides();
 
@@ -7,12 +7,14 @@ namespace c10 {
 std::shared_ptr<SymbolicIntNode> SymInt::toSymbolicIntNode() {
   auto& st = getSymIntTable();
   TORCH_CHECK(is_symbolic());
-  return st.getNode(SymInt::SYM_TAG_MASK ^ static_cast<uint64_t>(data_));
+  return st.getNode(static_cast<uint64_t>(data_) & ~MASK);
 }
 
 c10::SymInt SymInt::toSymInt(std::shared_ptr<SymbolicIntNode> sin_sp) {
   auto& sit = getSymIntTable();
-  auto data = sit.addNode(sin_sp) | SYM_TAG_MASK;
-  return c10::SymInt(data);
+  uint64_t idx = sit.addNode(sin_sp);
+  TORCH_CHECK(idx < MAX_SYM_IDX, "SymbolicIntNode index overflow: ", idx);
+  uint64_t data = idx | IS_SYM;
+  return c10::SymInt(static_cast<int64_t>(data));
 }
 } // namespace c10
@@ -25,10 +25,6 @@ class SymbolicIntNode;
 // SymInt will be extenteded to represent a union structure Union[int64_t,
 // SymbolicIntNode*] which will be implemented as a single packed int64_t field
 // named data_.
-//
-// data_ can be either a plain int64_t or (1 << 63 | `index`). `index` points to
-// SymbolicIntNode* that will be responsible for constructing an IR node for
-// a traced operation to represent it in LTC or Fx graphs.
 class C10_API SymInt {
  public:
   explicit SymInt(int64_t d) : data_(d){};
@@ -39,8 +35,7 @@ class C10_API SymInt {
   }
 
   bool is_symbolic() const {
-    return static_cast<uint64_t>(SYM_TAG_MASK) &
-        static_cast<uint64_t>(this->data_);
+    return (MASK & static_cast<uint64_t>(this->data_)) == IS_SYM;
   }
 
   bool operator==(const SymInt& p2) const {
@@ -62,8 +57,31 @@ class C10_API SymInt {
     return data_;
   }
 
+  // Return whether the integer is representable as a SymInt.
+  static bool check_range(int64_t i) {
+    return i > MIN_INT;
+  }
+
  private:
-  const static int64_t SYM_TAG_MASK = 1LL << 63;
+  // Constraints on the internal representation:
+  // - Should represent positive and negative ints
+  // - No conversion necessary for operations on ints.
+  // - We reserve some values to act as indices into our sym int table.
+  //
+  // So, the scheme is to reserve large negative numbers:
+  // - 0b0.... means we are a positive int (following two's complement)
+  // - 0b11... means we are a negative int (following two's complement)
+  // - 0b10... means we are index into the sym table. This means that
+  //           [-2^63, -2^62-1] are not representable as ints.
+  static constexpr uint64_t MASK = 1ULL << 63 | 1ULL << 62;
+  static constexpr uint64_t IS_SYM = 1ULL << 63;
+  // Since we use the top two bits to determine whether something is symbolic,
+  // we cannot represent symbolic indices that are large enough to use those
+  // bits. This will probably never happen.
+  static constexpr uint64_t MAX_SYM_IDX = 1ULL << 62;
+  // Since 0b10... is reserved for symbolic indices, any integers lower than
+  // this value would collide with our representation.
+  static constexpr int64_t MIN_INT = -1LL & ~(1ULL << 62);
   int64_t data_;
 };
 
 
@@ -2,7 +2,7 @@
 
 namespace c10 {
 
-int64_t SymIntTable::addNode(std::shared_ptr<SymbolicIntNode> sin) {
+uint64_t SymIntTable::addNode(std::shared_ptr<SymbolicIntNode> sin) {
   std::lock_guard<std::mutex> lock(mutex_);
   auto index = nodes_.size();
   nodes_.push_back(sin);
 
@@ -20,7 +20,7 @@ class C10_API SymbolicIntNode
 
 class C10_API SymIntTable {
  public:
-  int64_t addNode(std::shared_ptr<SymbolicIntNode> sin);
+  uint64_t addNode(std::shared_ptr<SymbolicIntNode> sin);
   std::shared_ptr<SymbolicIntNode> getNode(size_t index);
 
  private:
 
@@ -2,6 +2,7 @@
 
 #include <c10/core/Backend.h>
 #include <c10/core/InferenceMode.h>
+#include <c10/core/SymIntArrayRef.h>
 #include <c10/core/WrapDimMinimal.h>
 #include <c10/core/impl/LocalDispatchKeySet.h>
 #include <c10/util/Optional.h>
@@ -371,6 +372,13 @@ IntArrayRef TensorImpl::sizes_custom() const {
   TORCH_CHECK(
       false, "Tensors of type ", tensorimpl_type_name(), " do not have sizes");
 }
+c10::SymIntArrayRef TensorImpl::sym_sizes_custom() const {
+  TORCH_CHECK(
+      false,
+      "Tensors of type ",
+      tensorimpl_type_name(),
+      " do not have sym sizes");
+}
 IntArrayRef TensorImpl::strides_custom() const {
   TORCH_CHECK(
       false,
 
@@ -548,6 +548,15 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
     return sizes_default();
   }
 
+  c10::SymIntArrayRef sym_sizes() const {
+    if (C10_UNLIKELY(
+            sizes_strides_policy_ >=
+            static_cast<uint8_t>(SizesStridesPolicy::CustomSizes))) {
+      return sym_sizes_custom();
+    }
+    return sym_sizes_default();
+  }
+
   /**
    * Return a reference to the strides of this tensor.  This reference remains
    * valid as long as the tensor is live and not restrided.
@@ -655,6 +664,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
   virtual bool is_contiguous_custom(at::MemoryFormat memory_format) const;
   // sizes_strides_policy_ >= CustomSizes
   virtual IntArrayRef sizes_custom() const;
+  virtual c10::SymIntArrayRef sym_sizes_custom() const;
   virtual int64_t dim_custom() const;
   virtual int64_t numel_custom() const;
 
@@ -675,6 +685,11 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
   inline IntArrayRef sizes_default() const {
     return sizes_and_strides_.sizes_arrayref();
   }
+  inline c10::SymIntArrayRef sym_sizes_default() const {
+    return c10::SymIntArrayRef(
+        reinterpret_cast<const c10::SymInt*>(sizes_and_strides_.sizes_data()),
+        sizes_and_strides_.size());
+  }
   inline int64_t dim_default() const {
     return sizes_and_strides_.size();
   }
 
@@ -0,0 +1,31 @@
+#include <gtest/gtest.h>
+
+#include <c10/core/SymInt.h>
+#include <c10/core/SymbolicIntNode.h>
+
+using namespace c10;
+
+void check(int64_t value) {
+  EXPECT_TRUE(SymInt::check_range(value));
+  const auto i = SymInt(value);
+  EXPECT_FALSE(i.is_symbolic());
+  EXPECT_EQ(i.data(), value);
+}
+
+TEST(SymIntTest, ConcreteInts) {
+  check(INT64_MAX);
+  check(0);
+  check(-1);
+  // This is 2^62, which is the most negative number we can support.
+  check(-4611686018427387904LL);
+}
+
+TEST(SymIntTest, AddNode) {
+  auto n = std::make_shared<SymbolicIntNode>();
+  auto i = n->toSymInt();
+  EXPECT_TRUE(i.is_symbolic());
+}
+
+TEST(SymIntTest, CheckRange) {
+  EXPECT_FALSE(SymInt::check_range(INT64_MIN));
+}
@@ -430,8 +430,7 @@ class TORCH_API Tensor final {
   }
 
   inline c10::SymIntArrayRef sym_sizes() const {
-    auto sizes = impl_.get()->sizes();
-    return c10::SymIntArrayRef(reinterpret_cast<const c10::SymInt*>(sizes.data()), sizes.size());
+    return impl_->sym_sizes();
   }
 
   inline int64_t size_from_dim(int k) const {
 
@@ -94,6 +94,19 @@ TEST(LazyDynamicOpsTest, NarrowCopy) {
   AllClose(z.cpu(), x.cpu().narrow_copy(X_DIM_INDEX, 0, Y_DIM));
 }
 
+TEST(LazyDynamicOpsTest, NarrowCopyViaSymSizes) {
+  auto xc = torch::rand({10});
+  auto x = xc.to(kLazy);
+  const size_t Y_DIM = 3;
+  const size_t X_DIM_INDEX = 0;
+  auto y = torch::rand({Y_DIM}).to(kLazy);
+  auto z = x.narrow_copy(X_DIM_INDEX, 0, y.sym_sizes()[0]);
+  auto zc = xc.narrow_copy(X_DIM_INDEX, 0, Y_DIM);
+  ASSERT_EQ(z.sizes()[0], xc.sizes()[0]); // note, xc not zc
+  // shape inference assumes narrow_copy can copy the whole tensor
+  AllClose(z.cpu(), zc);
+}
+
 TEST_F(LazyOpsTest, TestScalarTensor) {
   torch::Tensor scalar_tensor = torch::scalar_tensor(
       1., torch::TensorOptions(torch::kFloat).device(DefaultDevice()));
 
@@ -37,7 +37,7 @@ def testAdd(self):
             torch._lazy.mark_step()
 
         torch.testing.assert_close(z.cpu(), z_lazy.cpu())
-        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 16
+        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 14
         metrics.reset()
         torch._lazy.ir_cache.reset()
 
@@ -66,8 +66,7 @@ def testAddSub(self):
             torch._lazy.mark_step()
 
         torch.testing.assert_close(z.cpu(), z_lazy.cpu())
-        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 10
-        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 4
+        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 8
         metrics.reset()
         torch._lazy.ir_cache.reset()
 
@@ -97,7 +96,7 @@ def testAddSubFallback(self):
             torch._lazy.mark_step()
 
         torch.testing.assert_close(z.cpu(), z_lazy.cpu())
-        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 11
+        assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 8
         metrics.reset()
         torch._lazy.ir_cache.reset()
         torch._lazy.config.set_force_fallback("")
 
@@ -817,11 +817,6 @@ def _assert(condition, message):
 from torch import fft as fft
 from torch import futures as futures
 from torch import nn as nn
-import torch.nn.intrinsic
-import torch.nn.quantizable
-import torch.nn.quantized
-# AO depends on nn, as well as quantized stuff -- so should be after those.
-from torch import ao as ao
 from torch import optim as optim
 import torch.optim._multi_tensor
 from torch import multiprocessing as multiprocessing
@@ -847,6 +842,14 @@ def _assert(condition, message):
 from torch import __future__ as __future__
 from torch import profiler as profiler
 
+# Quantized, sparse, AO, etc. should be last to get imported, as nothing
+# is expected to depend on them.
+import torch.nn.intrinsic
+import torch.nn.quantizable
+import torch.nn.quantized
+# AO depends on nn, as well as quantized stuff -- so should be after those.
+from torch import ao as ao
+
 _C._init_names(list(torch._storage_classes))
 
 # attach docstrings to torch and tensor functions
 
@@ -6,6 +6,7 @@
 #include <c10/macros/Macros.h>
 #include <c10/util/irange.h>
 #include <torch/csrc/lazy/core/tensor_util.h>
+#include <torch/csrc/lazy/core/ir_builder.h>
 
 namespace torch {
 namespace lazy {
@@ -83,6 +84,14 @@ LTCTensorImpl::LTCTensorImpl(LazyTensor&& tensor)
   // according to https://github.com/pytorch/xla/pull/2682.
   is_non_overlapping_and_dense_ = false;
   set_sizes_strides_policy(SizesStridesPolicy::CustomSizes);
+
+  auto rank = tensor_->shape().Get().sizes().size();
+  sym_sizes_.reserve(rank);
+  for (auto i: c10::irange(rank)) {
+    auto dim_node = getBackend()->GetIrBuilder()->MakeSizeNode(this->tensor_->GetIrValue(), i);
+    auto sn = std::make_shared<torch::lazy::SymbolicIntNode>(dim_node);
+    sym_sizes_.push_back(sn->toSymInt());
+  }
 }
 
 void LTCTensorImpl::set_tensor(const LazyTensorPtr& lazy_tensor) {
@@ -127,6 +136,10 @@ void LTCTensorImpl::shallow_copy_from(
   generation_ = 0;
 }
 
+c10::SymIntArrayRef LTCTensorImpl::sym_sizes_custom() const {
+  return c10::SymIntArrayRef(sym_sizes_.data(), sym_sizes_.size());
+}
+
 void LTCTensorImpl::setup_size_properties() {
   size_t generation = tensor_->generation();
   if (generation != generation_) {
 
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <ATen/Tensor.h>
+#include <c10/core/SymIntArrayRef.h>
 #include <c10/core/TensorImpl.h>
 
 #include <torch/csrc/lazy/core/tensor.h>
@@ -38,6 +39,8 @@ class TORCH_API LTCTensorImpl final : public c10::TensorImpl {
   int64_t numel_custom() const override;
   bool is_contiguous_custom(at::MemoryFormat memory_format) const override;
 
+  virtual c10::SymIntArrayRef sym_sizes_custom() const override;
+
 #ifndef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
   const at::Storage& storage() const override { return tensor_->Storage(); }
   bool has_storage() const override { return tensor_->Storage(); }
@@ -47,6 +50,7 @@ class TORCH_API LTCTensorImpl final : public c10::TensorImpl {
   void setup_size_properties();
 
   LazyTensorPtr tensor_;
+  std::vector<c10::SymInt> sym_sizes_;
   size_t generation_ {0};
 };
Original file line number	Diff line number	Diff line change
`@@ -221,7 +221,7 @@ class TORCH_API TensorBase {`
`221`	`221`	`return impl_->sizes();`
`222`	`222`	`}`
`223`	`223`	`c10::SymIntArrayRef sym_sizes() const {`
`224`		`- return c10::SymIntArrayRef(reinterpret_cast<const SymInt*>(sizes().data()), sizes().size());`
	`224`	`+ return impl_->sym_sizes();`
`225`	`225`	`}`
`226`	`226`	`IntArrayRef strides() const {`
`227`	`227`	`return impl_->strides();`
Original file line number	Diff line number	Diff line change
`@@ -430,8 +430,7 @@ class TORCH_API Tensor final {`
`430`	`430`	`}`
`431`	`431`
`432`	`432`	`inline c10::SymIntArrayRef sym_sizes() const {`
`433`		`- auto sizes = impl_.get()->sizes();`
`434`		`- return c10::SymIntArrayRef(reinterpret_cast<const c10::SymInt*>(sizes.data()), sizes.size());`
	`433`	`+ return impl_->sym_sizes();`
`435`	`434`	`}`
`436`	`435`
`437`	`436`	`inline int64_t size_from_dim(int k) const {`