pytorch
diff --git a/‎aten/src/ATen/core/interned_strings.h
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/core/interned_strings.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/native/BatchLinearAlgebra.cpp
Lines changed: 19 additions & 0 deletions b/‎aten/src/ATen/native/BatchLinearAlgebra.cpp
Lines changed: 19 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/native_functions.yaml
Lines changed: 7 additions & 0 deletions b/‎aten/src/ATen/native/native_functions.yaml
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/source/linalg.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/linalg.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎torch/_torch_docs.py
Lines changed: 18 additions & 5 deletions b/‎torch/_torch_docs.py
Lines changed: 18 additions & 5 deletions
diff --git a/‎torch/linalg/__init__.py
Lines changed: 35 additions & 0 deletions b/‎torch/linalg/__init__.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎torch/overrides.py
Lines changed: 1 ad F552 dition & 0 deletions b/‎torch/overrides.py
Lines changed: 1 ad F552 dition & 0 deletions
diff --git a/‎torch/testing/_internal/common_methods_invocations.py
Lines changed: 21 additions & 2 deletions b/‎torch/testing/_internal/common_methods_invocations.py
Lines changed: 21 additions & 2 deletions
@@ -218,6 +218,7 @@ namespace c10 {
   _(aten, mH)                        \
   _(aten, linalg_matrix_power)       \
   _(aten, chain_matmul)              \
+  _(aten, linalg_vecdot)             \
   _(aten, linalg_multi_dot)          \
   _(aten, linalg_norm)               \
   _(aten, linalg_vector_norm)        \
 
@@ -3836,6 +3836,25 @@ TransposeType to_transpose_type(const bool contig, const bool conj) {
 }
 } // end of anonymous namespace
 
+Tensor& linalg_vecdot_out(const Tensor& x, const Tensor& y, int64_t dim, Tensor& out) {
+  // Computes x^H y
+  if (x.dim() == 1 && y.dim() == 1) {
+    at::native::resize_output(out, {});
+    return at::vdot_out(out, x, y);
+  } else {
+    return at::sum_out(out, x.conj() * y, /*dim=*/dim);
+  }
+}
+
+Tensor linalg_vecdot(const Tensor& x, const Tensor& y, int64_t dim) {
+  // Computes x^H y
+  if (x.dim() == 1 && y.dim() == 1) {
+    return at::vdot(x, y);
+  } else {
+    return x.conj().mul(y).sum(/*dim=*/dim);
+  }
+}
+
 /*
 Solves the matrix equation AX = B for A triangular.
 'left' If true solves AX = B, if false solves XA = B
 
@@ -10844,6 +10844,13 @@
 - func: linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   python_module: linalg
 
+- func: linalg_vecdot(Tensor x, Tensor y, *, int dim=-1) -> Tensor
+  python_module: linalg
+  variants: function
+
+- func: linalg_vecdot.out(Tensor x, Tensor y, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
+  python_module: linalg
+
 - func: linalg_matrix_exp(Tensor self) -> Tensor
   python_module: linalg
   variants: function
 
@@ -80,6 +80,7 @@ Matrix Products
 
     cross
     matmul
+    vecdot
     multi_dot
     householder_product
 
 
@@ -3429,21 +3429,34 @@ def merge_dicts(*dicts):
            r"""
 vdot(input, other, *, out=None) -> Tensor
 
-Computes the dot product of two 1D tensors. The vdot(a, b) function handles complex numbers
-differently than dot(a, b). If the first argument is complex, the complex conjugate of the
-first argument is used for the calculation of the dot product.
+Computes the dot product of two 1D vectors along a dimension.
+
+In symbols, this function computes
+
+.. math::
+
+    \sum_{i=1}^n \overline{x_i}y_i.
+
+where :math:`\overline{x_i}` denotes the conjugate for complex
+vectors, and it is the identity for real vectors.
 
 .. note::
 
     Unlike NumPy's vdot, torch.vdot intentionally only supports computing the dot product
     of two 1D tensors with the same number of elements.
 
+.. seealso::
+
+        :func:`torch.linalg.vecdot` computes the dot product of two batches of vectors along a dimension.
+
 Args:
     input (Tensor): first tensor in the dot product, must be 1D. Its conjugate is used if it's complex.
     other (Tensor): second tensor in the dot product, must be 1D.
 
 Keyword args:
-    {out}
+""" + fr"""
+.. note:: {common_args["out"]}
+""" + r"""
 
 Example::
 
@@ -3455,7 +3468,7 @@ def merge_dicts(*dicts):
     tensor([16.+1.j])
     >>> torch.vdot(b, a)
     tensor([16.-1.j])
-""".format(**common_args))
+""")
 
 add_docstr(torch.eig,
            r"""
 
@@ -2251,3 +2251,38 @@
     >>> torch.dist(Q.mT @ Q, torch.eye(4))
     tensor(6.2158e-07)
 """)
+
+vecdot = _add_docstr(_linalg.linalg_vecdot, r"""
+linalg.vecdot(x, y, *, dim=-1, out=None) -> Tensor
+
+Computes the dot product of two batches of vectors along a dimension.
+
+In symbols, this function computes
+
+.. math::
+
+    \sum_{i=1}^n \overline{x_i}y_i.
+
+over the dimension :attr:`dim` where :math:`\overline{x_i}` denotes the conjugate for complex
+vectors, and it is the identity for real vectors.
+
+Supports input of half, bfloat16, float, double, cfloat, cdouble and integral dtypes.
+It also supports broadcasting.
+
+Args:
+    x (Tensor): first batch of vectors.
+    y (Tensor): second batch of vectors.
+
+Keyword args:
+    dim (int): Dimension along which to compute the dot product. Default: `-1`.
+    out (Tensor, optional): output tensor. Ignored if `None`. Default: `None`.
+
+Examples::
+
+    >>> v1 = torch.randn(3, 2)
+    >>> v2 = torch.randn(3, 2)
+    >>> linalg.vecdot(v1, v2)
+    tensor([ 0.3223,  0.2815, -0.1944])
+    >>> torch.vdot(v1[0], v2[0])
+    tensor(0.3223)
+""")
@@ -869,6 +869,7 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
         torch.ravel: lambda input: -1,
         torch.real: lambda input, out=None: -1,
         torch.vdot: lambda input, other, out=None: -1,
+        torch.linalg.vecdot: lambda input, other, dim=-1, out=None: -1,
         torch.view_as_real: lambda input: -1,
         torch.view_as_complex: lambda input: -1,
         torch.reciprocal: lambda input, out=None: -1,
 
@@ -968,7 +968,7 @@ def sample_inputs_reduction(op_info, device, dtype, requires_grad, **kwargs):
     supports_multiple_dims: bool = kwargs.get('supports_multiple_dims', True)
 
     # TODO(@heitorschueroff) Once all reduction operators are using ReductionOpInfo
-    # use op_info.genearte_args_kwargs directly.
+    # use op_info.generate_args_kwargs directly.
     generate_args_kwargs = kwargs.get('generate_args_kwargs', lambda *args, **kwargs: (yield tuple(), {}))
 
     inputs: List[SampleInput] = []
@@ -1101,7 +1101,7 @@ class ReductionOpInfo(OpInfo):
     the optional keyword parameters of the ReductionOpInfo constructor.
 
     If a reduction operator does not yet implement the full required API of
-    reduction operators, this should be documented by skipping the failing
+    reduction operators, this should be documented by xfailing the failing
     tests rather than adding optional parameters to ReductionOpInfo.
 
     NOTE
@@ -2012,6 +2012,15 @@ def sample_inputs_isclose(
         yield SampleInput(lhs, args=(rhs,),
                           kwargs=dict(op_kwargs, rtol=rtol, atol=atol, equal_nan=equal_nan))
 
+def sample_inputs_linalg_vecdot(op_info, device, dtype, requires_grad, **kwargs):
+    yield from sample_inputs_binary_pwise(op_info, device, dtype, requires_grad)
+
+    # Add also samples with dim != -1
+    for s in sample_inputs_binary_pwise(op_info, device, dtype, requires_grad):
+        if s.input.ndim > 1:
+            s.kwargs["dim"] = 0
+            yield s
+
 def sample_inputs_t(op_info, device, dtype, requires_grad, **kwargs):
     make_arg = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
     return (SampleInput(make_arg((1, 2))),
@@ -9778,6 +9787,16 @@ def ref_pairwise_distance(input1, input2):
            gradcheck_wrapper=gradcheck_wrapper_hermitian_input,
            decorators=[skipCUDAIfNoMagmaAndNoCusolver, skipCUDAIfRocm, skipCPUIfNoLapack],
            ),
+    OpInfo('linalg.vecdot',
+           aten_name='linalg_vecdot',
+           ref=lambda x, y, *, dim=-1: (x.conj() * y).sum(dim),
+           dtypes=all_types_and_complex_and(torch.half, torch.bfloat16, torch.bool),
+           sample_inputs_func=sample_inputs_linalg_vecdot,
+           supports_forward_ad=True,
+           supports_fwgrad_bwgrad=True,
+           skips=(
+               # FIXME torch.sum(out=) has an incorrect behaviour
+               DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out'),),),
     OpInfo('linalg.cond',
            aten_name='linalg_cond',
            dtypes=floating_and_complex_types(),