pytorch · lezcano · Dec 31, 2021 · Jan 3, 2022 · Jan 3, 2022 · Jan 5, 2022
diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp
@@ -4102,6 +4102,39 @@ TORCH_IMPL_FUNC(linalg_ldl_solve_out)
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ solve_triangular ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+Tensor& linalg_vecdot_out(const Tensor& x, const Tensor& y, int64_t dim, Tensor& out) {
+  checkFloatingOrComplex(x, "linalg.vecdot");
+  TORCH_CHECK(x.scalar_type() == y.scalar_type(),
+              "linalg.vecdot: Expected x and y to have the same dtype, but found x of type ",
+              x.scalar_type(), " and y of type ", y.scalar_type(), " instead");
+  // out checks
+  TORCH_CHECK(out.scalar_type() == x.scalar_type(),
+              "linalg.vecdot: Expected out of dtype", x.scalar_type(),
+              " but found ", out.scalar_type());
+  checkSameDevice("linalg.vecdot", x, out);
+
+  // Computes x^H y
+  if (x.dim() == 1 && y.dim() == 1) {
+    at::native::resize_output(out, {});
+    return at::vdot_out(out, x, y);
+  } else {
+    return at::sum_out(out, x.conj() * y, /*dim=*/dim);
+  }
+}
+
+Tensor linalg_vecdot(const Tensor& x, const Tensor& y, int64_t dim) {
+  checkFloatingOrComplex(x, "linalg.vecdot");
+  TORCH_CHECK(x.scalar_type() == y.scalar_type(),
+              "linalg.vecdot: Expected x and y to have the same dtype, but found x of type ",
+              x.scalar_type(), " and y of type ", y.scalar_type(), " instead");
+  // Computes x^H y
+  if (x.dim() == 1 && y.dim() == 1) {
+    return at::vdot(x, y);
+  } else {
+    return x.conj().mul(y).sum(/*dim=*/dim);
+  }
+}
+
 /*
 Solves the matrix equation AX = B for A triangular.
 'left' If true solves AX = B, if false solves XA = B

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -11612,6 +11612,13 @@
 - func: linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   python_module: linalg
 
+- func: linalg_vecdot(Tensor x, Tensor y, *, int dim=-1) -> Tensor
+  python_module: linalg
+  variants: function
+
+- func: linalg_vecdot.out(Tensor x, Tensor y, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
+  python_module: linalg
+
 - func: linalg_matrix_exp(Tensor self) -> Tensor
   python_module: linalg
   variants: function

diff --git a/docs/source/linalg.rst b/docs/source/linalg.rst
@@ -84,6 +84,7 @@ Matrix Products
 
     cross
     matmul
+    vecdot
     multi_dot
     householder_product
 

diff --git a/test/allowlist_for_publicAPI.json b/test/allowlist_for_publicAPI.json
@@ -1148,6 +1148,7 @@
     "tensorinv",
     "tensorsolve",
     "vander",
+    "vecdot",
     "vector_norm"
   ],
   "torch.multiprocessing": [

diff --git a/test/test_meta.py b/test/test_meta.py
@@ -497,6 +497,7 @@ def run_meta_crossref(
     torch.nanmean: {bf16, f16, f32, f64},  # TODO(chilli): Doesn't seem to work for some reason?
     torch.nn.functional.nll_loss: {bf16, f32, f64},  # TODO
     torch.linalg.pinv: {f32, f64},
+    torch.linalg.vecdot: {f16, bf16, f32, f64},  # aten::prod
     torch.empty: {b8, bf16, c128, c64, c32, f16, f32, f64, i16, i32, i64, i8, u8},
 }
 

diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
@@ -3513,21 +3513,34 @@ def merge_dicts(*dicts):
            r"""
 vdot(input, other, *, out=None) -> Tensor
 
-Computes the dot product of two 1D tensors. The vdot(a, b) function handles complex numbers
-differently than dot(a, b). If the first argument is complex, the complex conjugate of the
-first argument is used for the calculation of the dot product.
+Computes the dot product of two 1D vectors along a dimension.
+
+In symbols, this function computes
+
+.. math::
+
+    \sum_{i=1}^n \overline{x_i}y_i.
+
+where :math:`\overline{x_i}` denotes the conjugate for complex
+vectors, and it is the identity for real vectors.
 
 .. note::
 
     Unlike NumPy's vdot, torch.vdot intentionally only supports computing the dot product
     of two 1D tensors with the same number of elements.
 
+.. seealso::
+
+        :func:`torch.linalg.vecdot` computes the dot product of two batches of vectors along a dimension.
+
 Args:
     input (Tensor): first tensor in the dot product, must be 1D. Its conjugate is used if it's complex.
     other (Tensor): second tensor in the dot product, must be 1D.
 
 Keyword args:
-    {out}
+""" + fr"""
+.. note:: {common_args["out"]}
+""" + r"""
 
 Example::
 
@@ -3539,7 +3552,7 @@ def merge_dicts(*dicts):
     tensor([16.+1.j])
     >>> torch.vdot(b, a)
     tensor([16.-1.j])
-""".format(**common_args))
+""")
 
 add_docstr(torch.eig,
            r"""

diff --git a/torch/linalg/__init__.py b/torch/linalg/__init__.py
@@ -2801,3 +2801,38 @@
             [ 1,  3,  9],
             [ 1,  5, 25]])
 """)
+
+vecdot = _add_docstr(_linalg.linalg_vecdot, r"""
+linalg.vecdot(x, y, *, dim=-1, out=None) -> Tensor
+
+Computes the dot product of two batches of vectors along a dimension.
+
+In symbols, this function computes
+
+.. math::
+
+    \sum_{i=1}^n \overline{x_i}y_i.
+
+over the dimension :attr:`dim` where :math:`\overline{x_i}` denotes the conjugate for complex
+vectors, and it is the identity for real vectors.
+
+Supports input of half, bfloat16, float, double, cfloat, cdouble and integral dtypes.
+It also supports broadcasting.
+
+Args:
+    x (Tensor): first batch of vectors of shape `(*, n)`.
+    y (Tensor): second batch of vectors of shape `(*, n)`.
+
+Keyword args:
+    dim (int): Dimension along which to compute the dot product. Default: `-1`.
+    out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`.
+
+Examples::
+
+    >>> v1 = torch.randn(3, 2)
+    >>> v2 = torch.randn(3, 2)
+    >>> linalg.vecdot(v1, v2)
+    tensor([ 0.3223,  0.2815, -0.1944])
+    >>> torch.vdot(v1[0], v2[0])
+    tensor(0.3223)
+""")
diff --git a/torch/overrides.py b/torch/overrides.py
@@ -923,6 +923,7 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
         torch.ravel: lambda input: -1,
         torch.real: lambda input, out=None: -1,
         torch.vdot: lambda input, other, out=None: -1,
+        torch.linalg.vecdot: lambda input, other, dim=-1, out=None: -1,
         torch.view_as_real: lambda input: -1,
         torch.view_as_complex: lambda input: -1,
         torch.reciprocal: lambda input, out=None: -1,

diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
@@ -1351,7 +1351,7 @@ class ReductionOpInfo(OpInfo):
     the optional keyword parameters of the ReductionOpInfo constructor.
 
     If a reduction operator does not yet implement the full required API of
-    reduction operators, this should be documented by skipping the failing
+    reduction operators, this should be documented by xfailing the failing
     tests rather than adding optional parameters to ReductionOpInfo.
 
     NOTE
@@ -3045,6 +3045,17 @@ def error_inputs_isclose(op, device, **kwargs):
                      error_regex='atol must be greater than or equal to zero')
 
 
+def sample_inputs_linalg_vecdot(op_info, device, dtype, requires_grad, **kwargs):
+    make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
+    batches = ((), (0,), (1,), (5,))
+    ns = (0, 1, 3, 5)
+    for b, n in product(batches, ns):
+        shape = b + (n,)
+        yield SampleInput(make_arg(shape), args=(make_arg(shape),))
+        for i in range(len(shape)):
+            yield SampleInput(make_arg(shape), args=(make_arg(shape),), kwargs=dict(dim=i))
+
+
 def sample_inputs_t(op_info, device, dtype, requires_grad, **kwargs):
     make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
     return (SampleInput(make_arg((1, 2))),
@@ -12464,6 +12475,16 @@ def error_inputs_mean(op_info, device, **kwargs):
            gradcheck_wrapper=gradcheck_wrapper_hermitian_input,
            decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCPUIfNoLapack],
            ),
+    OpInfo('linalg.vecdot',
+           aten_name='linalg_vecdot',
+           ref=lambda x, y, *, dim=-1: (x.conj() * y).sum(dim),
+           dtypes=floating_and_complex_types_and(torch.bfloat16),
+           dtypesIfCUDA=floating_and_complex_types_and(torch.half,
+                                                       *[torch.bfloat16] if (CUDA11OrLater or TEST_WITH_ROCM) else []),
+           sample_inputs_func=sample_inputs_linalg_vecdot,
+           check_batched_forward_grad=False,
+           supports_forward_ad=True,
+           supports_fwgrad_bwgrad=True),
     OpInfo('linalg.cond',
            aten_name='linalg_cond',
            dtypes=floating_and_complex_types(),
-Original file line number
+Diff line change
@@ Expand Up / @@ -84,6 +84,7 @@ Matrix Products @@
         cross
         matmul
+        vecdot
         multi_dot
         householder_product
@@ Expand Down @@