10000 [inductor] weight prepack for _convolution_transpose_pointwise by chunyuan-w · Pull Request #90266 · pytorch/pytorch · GitHub
[go: up one dir, main page]

Skip to content

[inductor] weight prepack for _convolution_transpose_pointwise #90266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3a3fed6
[inductor] weight prepack for _convolution_transpose_pointwise
chunyuan-w Dec 6, 2022
4b3185c
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 6, 2022
0b90c29
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 6, 2022
a215137
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 6, 2022
6082d2e
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 8, 2022
99b142d
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 8, 2022
0335b8a
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 9, 2022
f7785a1
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 9, 2022
cb2baa5
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 12, 2022
455c405
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 12, 2022
6332bf9
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 13, 2022
29f422b
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 13, 2022
2af66a4
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 15, 2022
762de3e
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 15, 2022
84fe700
Update on "[inductor] weight prepac 8000 k for _convolution_transpose_point…
chunyuan-w Dec 15, 2022
2600a85
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 15, 2022
c63866c
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 15, 2022
1e6b9bb
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 19, 2022
7ccfae3
Update on "[inductor] weight prepack for _convolution_transpose_point…
chunyuan-w Dec 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update on "[inductor] weight prepack for _convolution_transpose_point…
…wise"

cc VitalyFedyunin jgong5 mingfeima XiaobingSuper sanchitintel ashokei jingxu10 mlazos soumith voznesenskym yanboliang penguinwu anijain2305 EikanWang Guobing-Chen zhuhaozhe blzheng Xia-Weiwen wenzhe-nrv jiayisunx peterbell10 desertfire

[ghstack-poisoned]
  • Loading branch information
chunyuan-w committed Dec 9, 2022
commit 0335b8acfb9f45b40c031e51dc77f0c4947229c3
111 changes: 37 additions & 74 deletions torch/_inductor/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -3349,71 +3349,6 @@ def get_template_tiling(self):
)


def _prepare_convolution_fusion_create(
cls,
x: "TensorBox",
weight: "TensorBox",
bias: "TensorBox",
padding_: List[int],
stride_: List[int],
dilation_: List[int],
groups: int,
transposed: bool = False,
output_padding_: List[int] = None,
):
"""
This function is a helper function to prepare inputs, layout and constant args
for convolution post-op fusion's create function, including deciding the output
layout (channels first or channels last), realizing inputs and make them etc. The
function only supports the CPU device since conv post-op fusion kernel is only
supported on CPU right now.
"""
stride = tuple(stride_)
padding = tuple(padding_)
dilation = tuple(dilation_)
assert isinstance(groups, int)
output_padding = tuple(output_padding_) if output_padding_ else (0, 0)
with torch._subclasses.FakeTensorMode():
x_fake = ir_node_to_tensor(x, guard_shape=True)
weight_fake = ir_node_to_tensor(weight, guard_shape=True)
bias_fake = (
ir_node_to_tensor(bias, guard_shape=True) if bias is not None else bias
)
output = torch.ops.aten.convolution(
x_fake,
weight_fake,
bias_fake,
stride,
padding,
dilation,
False,
[0, 0],
groups,
)
output_size = output.size()
req_stride_order = [0] + list(reversed(range(1, len(stride) + 1)))
req_stride_order = [len(req_stride_order)] + req_stride_order
output_stride = make_channels_last_strides_for(output_size)

x = cls.require_stride_order(x, req_stride_order)
assert x.get_device().type == "cpu" and weight.get_device().type == "cpu"
inputs = [x, weight]

kernel_layout = FixedLayout(
x.get_device(),
x.get_dtype(),
output.size(),
output_stride,
)
constant_args = [padding, stride, dilation, groups]

if bias is not None:
inputs.append(bias)
else:
constant_args.insert(0, bias)
return inputs, constant_args, kernel_layout, req_stride_order


# Port from aten/src/ATen/native/ConvUtils.h: _conv_input_size
def _conv_input_size(
output_size, weight_size, padding, output_padding, stride, dilation, groups
Expand Down Expand Up @@ -3461,7 +3396,7 @@ def _original_deconv_weight_size(
return weight_size


def _prepare_convolution_transpose_fusion_create(
def _prepare_convolution_fusion_create(
cls,
x: "TensorBox",
weight: "TensorBox",
Expand All @@ -3470,11 +3405,12 @@ def _prepare_convolution_transpose_fusion_create(
stride_: List[int],
dilation_: List[int],
groups: int,
output_padding_: List[int],
transposed: bool = False,
output_padding_: List[int] = None,
):
"""
This function is a helper function to prepare inputs, layout and constant args
for convolution_transpose post-op fusion's create function, including deciding the output
for convolution post-op fusion's create function, including deciding the output
layout (channels first or channels last), realizing inputs and make them etc. The
function only supports the CPU device since conv post-op fusion kernel is only
supported on CPU right now.
Expand All @@ -3483,20 +3419,47 @@ def _prepare_convolution_transpose_fusion_create(
padding = tuple(padding_)
dilation = tuple(dilation_)
assert isinstance(groups, int)
output_padding = tuple(output_padding_)
output_padding = tuple(output_padding_) if output_padding_ else (0, 0)
with torch._subclasses.FakeTensorMode():
x_fake = ir_node_to_tensor(x, guard_shape=True)
weight_fake = ir_node_to_tensor(weight, guard_shape=True)

weight_size = _original_deconv_weight_size(weight_fake, groups)
input_size = x_fake.size()
output_size = _conv_input_size(
input_size, weight_size, padding, output_padding, stride, dilation, groups
)
if transposed:
# When transposed, the size of the prepacked oneDNN weight is different
# from the PyTorch weight. We're not able to run aten conv with such
# size. We infer the output size from the input params here:
weight_size = _original_deconv_weight_size(weight_fake, groups)
input_size = x_fake.size()
output_size = _conv_input_size(
input_size,
weight_size,
padding,
output_padding,
stride,
dilation,
groups,
)
else:
bias_fake = (
ir_node_to_tensor(bias, guard_shape=True) if bias is not None else bias
)
output = torch.ops.aten.convolution(
x_fake,
weight_fake,
bias_fake,
stride,
padding,
dilation,
transposed,
output_padding,
groups,
)
output_size = output.size()

req_stride_order = [0] + list(reversed(range(1, len(stride) + 1)))
req_stride_order = [len(req_stride_order)] + req_stride_order
output_stride = make_channels_last_strides_for(output_size)

x = cls.require_stride_order(x, req_stride_order)
assert x.get_device().type == "cpu" and weight.get_device().type == "cpu"
inputs = [x, weight]
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.
0