8000 Update on "switch aten_headers to use strip_include_prefix instead of… · pytorch/pytorch@677fb76 · GitHub
[go: up one dir, main page]

Skip to content

Commit 677fb76

Browse files
author
Michael Andreas Dagitses
committed
Update on "switch aten_headers to use strip_include_prefix instead of includes"
This is Bazel best practices and easier to maintain. Differential Revision: [D36521515](https://our.internmc.facebook.com/intern/diff/D36521515/) [ghstack-poisoned]
2 parents d857401 + dca2d62 commit 677fb76

File tree

16 files changed

+145
-25
lines changed

16 files changed

+145
-25
lines changed

.github/workflows/lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
if: ${{ always() && steps.requirements.outcome == 'success' }}
101101
run: |
102102
set -eux
103-
python torch/testing/_check_kernel_launches.py |& tee "${GITHUB_WORKSPACE}"/cuda_kernel_launch_checks.txt
103+
python torch/testing/_internal/check_kernel_launches.py |& tee "${GITHUB_WORKSPACE}"/cuda_kernel_launch_checks.txt
104104
105105
workflow-checks:
106106
name: workflow-checks

BUILD.bazel

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,25 @@ cc_library(
241241
strip_include_prefix = "aten/src/",
242242
deps = [
243243
":aten_core_headers",
244+
":torch_base_headers",
244245
"//c10:headers",
245246
],
246247
)
247248

249+
# Temporary library to enable us to use strip_include_prefix =
250+
# "aten/src/" in aten_headers above. Only use this in aten_headers.
251+
cc_library(
252+
name = "torch_base_headers",
253+
hdrs = [
254+
"torch/csrc/Export.h",
255+
"torch/csrc/jit/frontend/function_schema_parser.h",
256+
],
257+
deps = [
258+
"//c10/macros",
259+
"//c10/util:base",
260+
],
261+
)
262+
248263
ATEN_COPTS = COMMON_COPTS + [
249264
"-DCAFFE2_BUILD_MAIN_LIBS",
250265
"-DHAVE_AVX_CPU_DEFINITION",

aten/src/ATen/core/TensorBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ class TORCH_API TensorBase {
221221
return impl_->sizes();
222222
}
223223
c10::SymIntArrayRef sym_sizes() const {
224-
return c10::SymIntArrayRef(reinterpret_cast<const SymInt*>(sizes().data()), sizes().size());
224+
return impl_->sym_sizes();
225225
}
226226
IntArrayRef strides() const {
227227
return impl_->strides();

c10/core/SymInt.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ namespace c10 {
77
std::shared_ptr<SymbolicIntNode> SymInt::toSymbolicIntNode() {
88
auto& st = getSymIntTable();
99
TORCH_CHECK(is_symbolic());
10-
return st.getNode(SymInt::SYM_TAG_MASK ^ static_cast<uint64_t>(data_));
10+
return st.getNode(static_cast<uint64_t>(data_) & ~MASK);
1111
}
1212

1313
c10::SymInt SymInt::toSymInt(std::shared_ptr<SymbolicIntNode> sin_sp) {
1414
auto& sit = getSymIntTable();
15-
auto data = sit.addNode(sin_sp) | SYM_TAG_MASK;
16-
return c10::SymInt(data);
15+
uint64_t idx = sit.addNode(sin_sp);
16+
TORCH_CHECK(idx < MAX_SYM_IDX, "SymbolicIntNode index overflow: ", idx);
17+
uint64_t data = idx | IS_SYM;
18+
return c10::SymInt(static_cast<int64_t>(data));
1719
}
1820
} // namespace c10

c10/core/SymInt.h

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ class SymbolicIntNode;
2525
// SymInt will be extenteded to represent a union structure Union[int64_t,
2626
// SymbolicIntNode*] which will be implemented as a single packed int64_t field
2727
// named data_.
28-
//
29-
// data_ can be either a plain int64_t or (1 << 63 | `index`). `index` points to
30-
// SymbolicIntNode* that will be responsible for constructing an IR node for
31-
// a traced operation to represent it in LTC or Fx graphs.
3228
class C10_API SymInt {
3329
public:
3430
explicit SymInt(int64_t d) : data_(d){};
@@ -39,8 +35,7 @@ class C10_API SymInt {
3935
}
4036

4137
bool is_symbolic() const {
42-
return static_cast<uint64_t>(SYM_TAG_MASK) &
43-
static_cast<uint64_t>(this->data_);
38+
return (MASK & static_cast<uint64_t>(this->data_)) == IS_SYM;
4439
}
4540

4641
bool operator==(const SymInt& p2) const {
@@ -62,8 +57,31 @@ class C10_API SymInt {
6257
return data_;
6358
}
6459

60+
// Return whether the integer is representable as a SymInt.
61+
static bool check_range(int64_t i) {
62+
return i > MIN_INT;
63+
}
64+
6565
private:
66-
const static int64_t SYM_TAG_MASK = 1LL << 63;
66+
// Constraints on the internal representation:
67+
// - Should represent positive and negative ints
68+
// - No conversion necessary for operations on ints.
69+
// - We reserve some values to act as indices into our sym int table.
70+
//
71+
// So, the scheme is to reserve large negative numbers:
72+
// - 0b0.... means we are a positive int (following two's complement)
73+
// - 0b11... means we are a negative int (following two's complement)
74+
// - 0b10... means we are index into the sym table. This means that
75+
// [-2^63, -2^62-1] are not representable as ints.
76+
static constexpr uint64_t MASK = 1ULL << 63 | 1ULL << 62;
77+
static constexpr uint64_t IS_SYM = 1ULL << 63;
78+
// Since we use the top two bits to determine whether something is symbolic,
79+
// we cannot represent symbolic indices that are large enough to use those
80+
// bits. This will probably never happen.
81+
static constexpr uint64_t MAX_SYM_IDX = 1ULL << 62;
82+
// Since 0b10... is reserved for symbolic indices, any integers lower than
83+
// this value would collide with our representation.
84+
static constexpr int64_t MIN_INT = -1LL & ~(1ULL << 62);
6785
int64_t data_;
6886
};
6987

c10/core/SymIntTable.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
namespace c10 {
44

5-
int64_t SymIntTable::addNode(std::shared_ptr<SymbolicIntNode> sin) {
5+
uint64_t SymIntTable::addNode(std::shared_ptr<SymbolicIntNode> sin) {
66
std::lock_guard<std::mutex> lock(mutex_);
77
auto index = nodes_.size();
88
nodes_.push_back(sin);

c10/core/SymbolicIntNode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class C10_API SymbolicIntNode
2020

2121
class C10_API SymIntTable {
2222
public:
23-
int64_t addNode(std::shared_ptr<SymbolicIntNode> sin);
23+
uint64_t addNode(std::shared_ptr<SymbolicIntNode> sin);
2424
std::shared_ptr<SymbolicIntNode> getNode(size_t index);
2525

2626
private:

c10/core/TensorImpl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <c10/core/Backend.h>
44
#include <c10/core/InferenceMode.h>
5+
#include <c10/core/SymIntArrayRef.h>
56
#include <c10/core/WrapDimMinimal.h>
67
#include <c10/core/impl/LocalDispatchKeySet.h>
78
#include <c10/util/Optional.h>
@@ -371,6 +372,13 @@ IntArrayRef TensorImpl::sizes_custom() const {
371372
TORCH_CHECK(
372373
false, "Tensors of type ", tensorimpl_type_name(), " do not have sizes");
373374
}
375+
c10::SymIntArrayRef TensorImpl::sym_sizes_custom() const {
376+
TORCH_CHECK(
377+
false,
378+
"Tensors of type ",
379+
tensorimpl_type_name(),
380+
" do not have sym sizes");
381+
}
374382
IntArrayRef TensorImpl::strides_custom() const {
375383
TORCH_CHECK(
376384
false,

c10/core/TensorImpl.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,15 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
548548
return sizes_default();
549549
}
550550

551+
c10::SymIntArrayRef sym_sizes() const {
552+
if (C10_UNLIKELY(
553+
sizes_strides_policy_ >=
554+
static_cast<uint8_t>(SizesStridesPolicy::CustomSizes))) {
555+
return sym_sizes_custom();
556+
}
557+
return sym_sizes_default();
558+
}
559+
551560
/**
552561
* Return a reference to the strides of this tensor. This reference remains
553562
* valid as long as the tensor is live and not restrided.
@@ -655,6 +664,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
655664
virtual bool is_contiguous_custom(at::MemoryFormat memory_format) const;
656665
// sizes_strides_policy_ >= CustomSizes
657666
virtual IntArrayRef sizes_custom() const;
667+
virtual c10::SymIntArrayRef sym_sizes_custom() const;
658668
virtual int64_t dim_custom() const;
659669
virtual int64_t numel_custom() const;
660670

@@ -675,6 +685,11 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
675685
inline IntArrayRef sizes_default() const {
676686
return sizes_and_strides_.sizes_arrayref();
677687
}
688+
inline c10::SymIntArrayRef sym_sizes_default() const {
689+
return c10::SymIntArrayRef(
690+
reinterpret_cast<const c10::SymInt*>(sizes_and_strides_.sizes_data()),
691+
sizes_and_strides_.size());
692+
}
678693
inline int64_t dim_default() const {
679694
return sizes_and_strides_.size();
680695
}

c10/test/core/SymInt_test.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include <gtest/gtest.h>
2+
3+
#include <c10/core/SymInt.h>
4+
#include <c10/core/SymbolicIntNode.h>
5+
6+
using namespace c10;
7+
8+
void check(int64_t value) {
10000 9+
EXPECT_TRUE(SymInt::check_range(value));
10+
const auto i = SymInt(value);
11+
EXPECT_FALSE(i.is_symbolic());
12+
EXPECT_EQ(i.data(), value);
13+
}
14+
15+
TEST(SymIntTest, ConcreteInts) {
16+
check(INT64_MAX);
17+
check(0);
18+
check(-1);
19+
// This is 2^62, which is the most negative number we can support.
20+
check(-4611686018427387904LL);
21+
}
22+
23+
TEST(SymIntTest, AddNode) {
24+
auto n = std::make_shared<SymbolicIntNode>();
25+
auto i = n->toSymInt();
26+
EXPECT_TRUE(i.is_symbolic());
27+
}
28+
29+
TEST(SymIntTest, CheckRange) {
30+
EXPECT_FALSE(SymInt::check_range(INT64_MIN));
31+
}

caffe2/core/tensor.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,7 @@ class TORCH_API Tensor final {
430430
}
431431

432432
inline c10::SymIntArrayRef sym_sizes() const {
433-
auto sizes = impl_.get()->sizes();
434-
return c10::SymIntArrayRef(reinterpret_cast<const c10::SymInt*>(sizes.data()), sizes.size());
433+
return impl_->sym_sizes();
435434
}
436435

437436
inline int64_t size_from_dim(int k) const {

test/cpp/lazy/test_lazy_ops.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,19 @@ TEST(LazyDynamicOpsTest, NarrowCopy) {
9494
AllClose(z.cpu(), x.cpu().narrow_copy(X_DIM_INDEX, 0, Y_DIM));
9595
}
9696

97+
TEST(LazyDynamicOpsTest, NarrowCopyViaSymSizes) {
98+
auto xc = torch::rand({10});
99+
auto x = xc.to(kLazy);
100+
const size_t Y_DIM = 3;
101+
const size_t X_DIM_INDEX = 0;
102+
auto y = torch::rand({Y_DIM}).to(kLazy);
103+
auto z = x.narrow_copy(X_DIM_INDEX, 0, y.sym_sizes()[0]);
104+
auto zc = xc.narrow_copy(X_DIM_INDEX, 0, Y_DIM);
105+
ASSERT_EQ(z.sizes()[0], xc.sizes()[0]); // note, xc not zc
106+
// shape inference assumes narrow_copy can copy the whole tensor
107+
AllClose(z.cpu(), zc);
108+
}
109+
97110
TEST_F(LazyOpsTest, TestScalarTensor) {
98111
torch::Tensor scalar_tensor = torch::scalar_tensor(
99112
1., torch::TensorOptions(torch::kFloat).device(DefaultDevice()));

test/lazy/test_reuse_ir.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def testAdd(self):
3737
torch._lazy.mark_step()
3838

3939
torch.testing.assert_close(z.cpu(), z_lazy.cpu())
40-
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 16
40+
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 14
4141
metrics.reset()
4242
torch._lazy.ir_cache.reset()
4343

@@ -66,8 +66,7 @@ def testAddSub(self):
6666
torch._lazy.mark_step()
6767

6868
torch.testing.assert_close(z.cpu(), z_lazy.cpu())
69-
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 10
70-
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 4
69+
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 8
7170
metrics.reset()
7271
torch._lazy.ir_cache.reset()
7372

@@ -97,7 +96,7 @@ def testAddSubFallback(self):
9796
torch._lazy.mark_step()
9897

9998
torch.testing.assert_close(z.cpu(), z_lazy.cpu())
100-
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 11
99+
assert metrics.counter_value("IrNodeReused_torch::lazy::AddTensor") >= 8
101100
metrics.reset()
102101
torch._lazy.ir_cache.reset()
103102
torch._lazy.config.set_force_fallback("")

torch/__init__.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -817,11 +817,6 @@ def _assert(condition, message):
817817
from torch import fft as fft
818818
from torch import futures as futures
819819
from torch import nn as nn
820-
import torch.nn.intrinsic
821-
import torch.nn.quantizable
822-
import torch.nn.quantized
823-
# AO depends on nn, as well as quantized stuff -- so should be after those.
824-
from torch import ao as ao
825820
from torch import optim as optim
826821
import torch.optim._multi_tensor
827822
from torch import multiprocessing as multiprocessing
@@ -847,6 +842,14 @@ def _assert(condition, message):
847842
from torch import __future__ as __future__
848843
from torch import profiler as profiler
849844

845+
# Quantized, sparse, AO, etc. should be last to get imported, as nothing
846+
# is expected to depend on them.
847+
import torch.nn.intrinsic
848+
import torch.nn.quantizable
849+
import torch.nn.quantized
850+
# AO depends on nn, as well as quantized stuff -- so should be after those.
851+
from torch import ao as ao
852+
850853
_C._init_names(list(torch._storage_classes))
851854

852855
# attach docstrings to torch and tensor functions

torch/csrc/lazy/core/tensor_impl.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <c10/macros/Macros.h>
77
#include <c10/util/irange.h>
88
#include <torch/csrc/lazy/core/tensor_util.h>
9+
#include <torch/csrc/lazy/core/ir_builder.h>
910

1011
namespace torch {
1112
namespace lazy {
@@ -83,6 +84,14 @@ LTCTensorImpl::LTCTensorImpl(LazyTensor&& tensor)
8384
// according to https://github.com/pytorch/xla/pull/2682.
8485
is_non_overlapping_and_dense_ = false;
8586
set_sizes_strides_policy(SizesStridesPolicy::CustomSizes);
87+
88+
auto rank = tensor_->shape().Get().sizes().size();
89+
sym_sizes_.reserve(rank);
90+
for (auto i: c10::irange(rank)) {
91+
auto dim_node = getBackend()->GetIrBuilder()->MakeSizeNode(this->tensor_->GetIrValue(), i);
92+
auto sn = std::make_shared<torch::lazy::SymbolicIntNode>(dim_node);
93+
sym_sizes_.push_back(sn->toSymInt());
94+
}
8695
}
8796

8897
void LTCTensorImpl::set_tensor(const LazyTensorPtr& lazy_tensor) {
@@ -127,6 +136,10 @@ void LTCTensorImpl::shallow_copy_from(
127136
generation_ = 0;
128137
}
129138

139+
c10::SymIntArrayRef LTCTensorImpl::sym_sizes_custom() const {
140+
return c10::SymIntArrayRef(sym_sizes_.data(), sym_sizes_.size());
141+
}
142+
130143
void LTCTensorImpl::setup_size_properties() {
131144
size_t generation = tensor_->generation();
132145
if (generation != generation_) {

torch/csrc/lazy/core/tensor_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <ATen/Tensor.h>
4+
#include <c10/core/SymIntArrayRef.h>
45
#include <c10/core/TensorImpl.h>
56

67
#include <torch/csrc/lazy/core/tensor.h>
@@ -38,6 +39,8 @@ class TORCH_API LTCTensorImpl final : public c10::TensorImpl {
3839
int64_t numel_custom() const override;
3940
bool is_contiguous_custom(at::MemoryFormat memory_format) const override;
4041

42+
virtual c10::SymIntArrayRef sym_sizes_custom() const override;
43+
4144
#ifndef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
4245
const at::Storage& storage() const override { return tensor_->Storage(); }
4346
bool has_storage() const override { return tensor_->Storage(); }
@@ -47,6 +50,7 @@ class TORCH_API LTCTensorImpl final : public c10::TensorImpl {
4750
void setup_size_properties();
4851

4952
LazyTensorPtr tensor_;
53+
std::vector<c10::SymInt> sym_sizes_;
5054
size_t generation_ {0};
5155
};
5256

0 commit comments

Comments
 (0)
0