diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
index 55b6b251981f81..285626421da2e0 100644
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-5444e06e5b851211af8a83e024c6703acfc095eb
+16d12d400cc3d0c42065c7291acbf03b8e696e89
diff --git a/aten/src/ATen/autocast_mode.cpp b/aten/src/ATen/autocast_mode.cpp
index feb4c0e0808490..bdb55a8c81f048 100644
--- a/aten/src/ATen/autocast_mode.cpp
+++ b/aten/src/ATen/autocast_mode.cpp
@@ -551,7 +551,6 @@ TORCH_LIBRARY_IMPL(aten, AutocastCPU, m) {
   KERNEL_CPU(grid_sampler_3d, fp32)
   KERNEL_CPU(trace, fp32)
   KERNEL_CPU(view_as_complex, fp32)
-  KERNEL_CPU(cholesky, fp32)
   KERNEL_CPU(cholesky_inverse, fp32)
   KERNEL_CPU(cholesky_solve, fp32)
   KERNEL_CPU(inverse, fp32)
diff --git a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp
index 3d58c26024be5c..b19c7257bf68a4 100644
--- a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp
+++ b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp
@@ -573,7 +573,6 @@ pinv_batch_rule(
   }
 
 // These need to be outside. String constant must be declared outside of a macro to be used as template param
-LINALG_CHECK_MATRIX_UNARY_ONE_OUT(cholesky, cholesky);
 LINALG_CHECK_MATRIX_UNARY_ONE_OUT(cholesky_inverse, cholesky_inverse);
 LINALG_CHECK_MATRIX_UNARY_TWO_OUT(linalg_cholesky_ex, linalg.cholesky);
 LINALG_CHECK_MATRIX_UNARY_TWO_OUT(linalg_eig, linalg.eig);
diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp
index 83613da6550240..7021ed75ade78e 100644
--- a/aten/src/ATen/native/BatchLinearAlgebra.cpp
+++ b/aten/src/ATen/native/BatchLinearAlgebra.cpp
@@ -37,10 +37,8 @@
 #include <ATen/ops/all.h>
 #include <ATen/ops/arange.h>
 #include <ATen/ops/cat.h>
-#include <ATen/ops/cholesky.h>
 #include <ATen/ops/cholesky_inverse.h>
 #include <ATen/ops/cholesky_inverse_native.h>
-#include <ATen/ops/cholesky_native.h>
 #include <ATen/ops/cholesky_solve.h>
 #include <ATen/ops/cholesky_solve_native.h>
 #include <ATen/ops/clone.h>
@@ -1687,62 +1685,6 @@ Tensor& cholesky_solve_out(const Tensor& self, const Tensor& A, bool upper, Tens
 
 DEFINE_DISPATCH(cholesky_stub);
 
-Tensor cholesky(const Tensor &self, bool upper) {
-   TORCH_WARN_ONCE(
-    "torch.cholesky is deprecated in favor of torch.linalg.cholesky and will be ",
-    "removed in a future PyTorch release.\n",
-    "L = torch.cholesky(A)\n",
-    "should be replaced with\n",
-    "L = torch.linalg.cholesky(A)\n",
-    "and\n"
-    "U = torch.cholesky(A, upper=True)\n",
-    "should be replaced with\n",
-    "U = torch.linalg.cholesky(A).mH().\n"
-    "This transform will produce equivalent results for all valid (symmetric positive definite) inputs."
-  );
-  if (self.numel() == 0) {
-    return at::empty_like(self, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
-  }
-  squareCheckInputs(self, "cholesky");
-
-  auto raw_cholesky_output = cloneBatchedColumnMajor(self);
-  auto info_shape = IntArrayRef(
-      self.sizes().cbegin(), self.sizes().cend() - 2); // self.shape[:-2]
-  auto info = at::empty({info_shape}, self.options().dtype(kInt));
-
-  // fill the raw_cholesky_output with the result
-  cholesky_stub(self.device().type(), raw_cholesky_output, info, upper);
-
-  at::_linalg_check_errors(info, "cholesky", self.dim() == 2);
-
-  if (upper) {
-    return raw_cholesky_output.triu_();
-  } else {
-    return raw_cholesky_output.tril_();
-  }
-}
-
-Tensor& cholesky_out(const Tensor &self, bool upper, Tensor &result) {
-   TORCH_WARN_ONCE(
-    "torch.cholesky is deprecated in favor of torch.linalg.cholesky and will be ",
-    "removed in a future PyTorch release.\n",
-    "L = torch.cholesky(A)\n",
-    "should be replaced with\n",
-    "L = torch.linalg.cholesky(A)\n",
-    "and\n"
-    "U = torch.cholesky(A, upper=True)\n",
-    "should be replaced with\n",
-    "U = torch.linalg.cholesky(A).mH().\n"
-    "This transform will produce equivalent results for all valid (symmetric positive definite) inputs."
-  );
-  checkSameDevice("cholesky", result, self);
-  checkLinalgCompatibleDtype("cholesky", result, self);
-  Tensor result_tmp = at::cholesky(self, upper);
-  at::native::resize_output(result, result_tmp.sizes());
-  result.copy_(result_tmp);
-  return result;
-}
-
 TORCH_IMPL_FUNC(linalg_cholesky_ex_out)(const Tensor& A,
                                         bool upper,
                                         bool check_errors,
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index 893715750e53e7..d57e8d35de2ab5 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -8798,15 +8798,6 @@
   device_guard: False
   tags: inplace_view
 
-- func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
-  dispatch:
-    CPU, CUDA: cholesky_out
-
-- func: cholesky(Tensor self, bool upper=False) -> Tensor
-  variants: method, function
-  dispatch:
-    CPU, CUDA: cholesky
-
 - func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
     CompositeExplicitAutograd: cholesky_solve_out
diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst
index 4f6de6f62d53e7..809288e1819516 100644
--- a/docs/source/tensors.rst
+++ b/docs/source/tensors.rst
@@ -276,7 +276,6 @@ Tensor class reference
     Tensor.ceil
     Tensor.ceil_
     Tensor.char
-    Tensor.cholesky
     Tensor.cholesky_inverse
     Tensor.cholesky_solve
     Tensor.chunk
diff --git a/docs/source/torch.rst b/docs/source/torch.rst
index 22a7b9acc0c0d0..2b39779a37d140 100644
--- a/docs/source/torch.rst
+++ b/docs/source/torch.rst
@@ -562,7 +562,6 @@ BLAS and LAPACK Operations
     baddbmm
     bmm
     chain_matmul
-    cholesky
     cholesky_inverse
     cholesky_solve
     dot
diff --git a/test/cpp/lazy/test_lazy_ops.cpp b/test/cpp/lazy/test_lazy_ops.cpp
index aa31ffc59bb51a..fc0ed83fd08ecb 100644
--- a/test/cpp/lazy/test_lazy_ops.cpp
+++ b/test/cpp/lazy/test_lazy_ops.cpp
@@ -1028,27 +1028,6 @@ TEST_F(LazyOpsTest, TestQR) {
   }
 }
 
-TEST_F(LazyOpsTest, TestCholesky) {
-  static const int dims[] = {4, 7};
-  for (auto m : dims) {
-    for (bool upper : {true, false}) {
-      torch::Tensor a = torch::rand(
-          {3, m, m},
-          torch::TensorOptions(torch::kFloat).device(DefaultDevice()));
-      torch::Tensor pd_a =
-          torch::matmul(a, torch::transpose(a, 1, 2)) +
-          torch::eye(
-              m, torch::TensorOptions(torch::kFloat).device(DefaultDevice()));
-      auto b = torch::cholesky(pd_a, upper);
-      ForEachDevice([&](const torch::Device& device) {
-        torch::Tensor lazy_a = CopyToDevice(pd_a, device);
-        auto lazy_b = torch::cholesky(lazy_a, upper);
-        AllClose(b, lazy_b, /*rtol=*/1e-3, /*atol=*/1e-4);
-      });
-    }
-  }
-}
-
 TEST_F(LazyOpsTest, TestLogDet) {
   static const int dims[] = {4, 7};
   for (auto m : dims) {
diff --git a/test/distributed/_tensor/test_dtensor_ops.py b/test/distributed/_tensor/test_dtensor_ops.py
index d172d9378de0c3..73258094c50459 100644
--- a/test/distributed/_tensor/test_dtensor_ops.py
+++ b/test/distributed/_tensor/test_dtensor_ops.py
@@ -119,7 +119,6 @@ def wrapped(fn):
     xfail("cauchy"),
     xfail("cartesian_prod"),
     xfail("cdist"),
-    xfail("cholesky"),
     xfail("cholesky_inverse"),
     xfail("cholesky_solve"),
     xfail("chunk"),
diff --git a/test/expect/HasDecompTest.test_has_decomposition.expect b/test/expect/HasDecompTest.test_has_decomposition.expect
index 989b9fb64d2f73..60586bbffb5654 100644
--- a/test/expect/HasDecompTest.test_has_decomposition.expect
+++ b/test/expect/HasDecompTest.test_has_decomposition.expect
@@ -642,8 +642,6 @@ aten::ccol_indices_copy
 aten::ccol_indices_copy.out
 aten::channel_shuffle
 aten::channel_shuffle.out
-aten::cholesky
-aten::cholesky.out
 aten::cholesky_inverse
 aten::cholesky_inverse.out
 aten::cholesky_solve
diff --git a/test/forward_backward_compatibility/check_forward_backward_compatibility.py b/test/forward_backward_compatibility/check_forward_backward_compatibility.py
index 91cf8b79bf4180..8d146a5f6807e3 100644
--- a/test/forward_backward_compatibility/check_forward_backward_compatibility.py
+++ b/test/forward_backward_compatibility/check_forward_backward_compatibility.py
@@ -51,6 +51,8 @@
     # Internal, profiler-specific ops
     ("profiler::_call_end_callbacks_on_jit_fut*", datetime.date(9999, 1, 1)),
     ("profiler::_record_function_enter", datetime.date(9999, 1, 1)),
+    ("aten::cholesky", datetime.date(9999, 1, 1)),
+    ("aten::cholesky.out", datetime.date(9999, 1, 1)),
     ("aten::_sparse_addmm", datetime.date(2022, 6, 30)),
     ("aten::kl_div_backward", datetime.date(2022, 9, 1)),
     ("aten::_cholesky_helper", datetime.date(9999, 1, 1)),
diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py
index fcc821c9bd7e2d..cf484d17da78e6 100644
--- a/test/functorch/test_aotdispatch.py
+++ b/test/functorch/test_aotdispatch.py
@@ -2471,7 +2471,6 @@ def forward(self, x):
     skip('as_strided', 'partial_views'),  # flaky
 
     # Too annoying to generate random inputs
-    xfail('cholesky'),
     xfail('linalg.cholesky'),
 
     # Given input size: (s0xs1x2). Calculated output size: ...
diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py
index e05f953cf54463..2f0e8e773542ea 100644
--- a/test/functorch/test_ops.py
+++ b/test/functorch/test_ops.py
@@ -1361,7 +1361,6 @@ def get_vjp(cotangents, *primals):
         # this is not supported. Tensor is of size [5, 2, 3] while the given forward gradient is of size [1, 2, 3].
         xfail('normal', ''),
         xfail('cdist', ''),  # NYI: forward-AD for _cdist_forward
-        xfail('cholesky', ''),  # NYI: forward-AD for cholesky
         xfail('logcumsumexp', ''),  # NYI: forward-AD for logcumsumexp
         xfail('nn.functional.embedding_bag', ''),  # NYI: forward-AD for _embedding_bag
         xfail('nn.functional.grid_sample', ''),  # NYI: forward AD for grid_sampler_2d
@@ -1482,7 +1481,6 @@ def reference(primals, cotangents, primals_tangents, cotangents_tangents):
         xfail('cdouble'),  # required rank 4 tensor to use channels_last format
         xfail('cfloat'),  # required rank 4 tensor to use channels_last format
         xfail('chalf'),  # required rank 4 tensor to use channels_last format
-        xfail('cholesky'),  # Forward AD not implemented and no decomposition
         xfail('ormqr'),  # Forward AD not implemented and no decomposition
         xfail('double'),  # required rank 4 tensor to use channels_last format
         xfail('float'),  # required rank 4 tensor to use channels_last format
diff --git a/test/inductor/test_torchinductor_opinfo.py b/test/inductor/test_torchinductor_opinfo.py
index ccdea7cc3f1b48..c39aacdc206cbb 100644
--- a/test/inductor/test_torchinductor_opinfo.py
+++ b/test/inductor/test_torchinductor_opinfo.py
@@ -160,7 +160,6 @@ def process(device_type):
     "bernoulli": {f32, f64},
     "bincount": {i32, i64},
     "bucketize": {b8, f16, f32, f64, i32, i64},
-    "cholesky": {f32, f64},
     "combinations": {b8, f16, f32, f64, i32, i64},
     "corrcoef": {f32, f64, i32, i64},
     "cov": {f32, f64, i32, i64},
@@ -260,7 +259,6 @@ def process(device_type):
     "bernoulli": {f16, f32, f64},
     "bincount": {i32, i64},
     "bucketize": {b8, f16, f32, f64, i32, i64},
-    "cholesky": {f32, f64},
     "combinations": {b8, f16, f32, f64, i32, i64},
     "corrcoef": {f16, f32, f64, i32, i64},
     "cov": {f16, f32, f64, i32, i64},
diff --git a/test/test_linalg.py b/test/test_linalg.py
index f45551e8d6a3cb..b8ac3b549097ed 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -141,6 +141,13 @@ def run_test_case(a, b):
         run_test_case(zero_strided, b)
         run_test_case(a, zero_strided)
 
+    def test_cholesky_removed_error(self, device):
+        a = make_tensor(5, 5, device=device, dtype=torch.float32)
+        with self.assertRaisesRegex(RuntimeError, "This function was deprecated since version 1.9 and is now removed"):
+            torch.cholesky(a)
+        with self.assertRaisesRegex(RuntimeError, "This function was deprecated since version 1.9 and is now removed"):
+            a.cholesky()
+
     def test_matrix_rank_removed_error(self, device):
         a = make_tensor(5, 5, device=device, dtype=torch.float32)
         with self.assertRaisesRegex(RuntimeError, "This function was deprecated since version 1.9 and is now removed"):
@@ -568,103 +575,6 @@ def test_cholesky_errors_and_warnings(self, device, dtype):
             with self.assertRaisesRegex(RuntimeError, "Expected all tensors to be on the same device"):
                 torch.linalg.cholesky(A, out=out)
 
-    # NOTE: old_cholesky* tests were moved here from test_torch.py and test_autograd.py
-    @slowTest
-    @skipCUDAIfNoMagma
-    @skipCPUIfNoLapack
-    @dtypes(torch.double)
-    def test_old_cholesky_batched_many_batches(self, device, dtype):
-        from torch.testing._internal.common_utils import random_symmetric_pd_matrix
-
-        def cholesky_test_helper(n, batchsize, device, upper):
-            A = random_symmetric_pd_matrix(n, batchsize, dtype=dtype, device=device)
-            chol_fact = torch.cholesky(A, upper=upper)
-            if upper:
-                # Correctness check
-                self.assertEqual(A, chol_fact.mT.matmul(chol_fact))
-                # Upper triangular check
-                self.assertEqual(chol_fact, chol_fact.triu())
-            else:
-                # Correctness check
-                self.assertEqual(A, chol_fact.matmul(chol_fact.mT))
-                # Lower triangular check
-                self.assertEqual(chol_fact, chol_fact.tril())
-
-        for upper, batchsize in itertools.product([True, False], [262144, 524288]):
-            cholesky_test_helper(2, batchsize, device, upper)
-
-    @precisionOverride({torch.float32: 1e-4, torch.complex64: 1e-4})
-    @skipCUDAIfNoMagma
-    @skipCPUIfNoLapack
-    @dtypes(*floating_and_complex_types())
-    def test_old_cholesky_batched(self, device, dtype):
-        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
-
-        def cholesky_test_helper(n, batch_dims, upper):
-            A = random_hermitian_pd_matrix(n, *batch_dims, dtype=dtype, device=device)
-            cholesky_exp = torch.stack([m.cholesky(upper=upper) for m in A.reshape(-1, n, n)])
-            cholesky_exp = cholesky_exp.reshape_as(A)
-            self.assertEqual(cholesky_exp, torch.cholesky(A, upper=upper))
-
-        for upper, batchsize in itertools.product([True, False], [(3,), (3, 4), (2, 3, 4)]):
-            cholesky_test_helper(3, batchsize, upper)
-
-    @precisionOverride({torch.float32: 1e-4, torch.complex64: 1e-4})
-    @skipCUDAIfNoMagma
-    @skipCPUIfNoLapack
-    @dtypes(*floating_and_complex_types())
-    @tf32_on_and_off(0.01)
-    def test_old_cholesky(self, device, dtype):
-        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
-
-        A = random_hermitian_pd_matrix(10, dtype=dtype, device=device)
-
-        # default Case
-        C = torch.cholesky(A)
-        B = torch.mm(C, C.t().conj())
-        self.assertEqual(A, B, atol=1e-14, rtol=0)
-
-        # test Upper Triangular
-        U = torch.cholesky(A, True)
-        B = torch.mm(U.t().conj(), U)
-        self.assertEqual(A, B, atol=1e-14, rtol=0, msg='cholesky (upper) did not allow rebuilding the original matrix')
-
-        # test Lower Triangular
-        L = torch.cholesky(A, False)
-        B = torch.mm(L, L.t().conj())
-        self.assertEqual(A, B, atol=1e-14, rtol=0, msg='cholesky (lower) did not allow rebuilding the original matrix')
-
-    @skipCUDAIfNoMagma
-    @skipCPUIfNoLapack
-    @dtypes(*floating_and_complex_types())
-    def test_old_cholesky_empty(self, device, dtype):
-        def run_test(upper):
-            A = torch.empty(0, 0, dtype=dtype, device=device)
-            chol = torch.cholesky(A, upper)
-            chol_A = torch.matmul(chol, chol.t().conj())
-            self.assertEqual(A, chol_A)
-        for upper in [True, False]:
-            run_test(upper)
-
-    # Test for issue
-    # https://github.com/pytorch/pytorch/issues/57032
-    # torch.cholesky with upper=True for batched CUDA inputs was wrong
-    # it was using the lower triangular part instead of the upper one
-    @onlyCUDA
-    @skipCUDAIfNoMagma
-    @dtypes(*floating_and_complex_types())
-    def test_old_cholesky_batched_upper(self, device, dtype):
-        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
-
-        batchsize = 2
-        A = random_hermitian_pd_matrix(3, batchsize, dtype=dtype, device=device)
-        A_triu = A.triu()  # fill the lower triangular part with zero
-
-        U = torch.cholesky(A_triu, upper=True)
-
-        reconstruct_A = U.mH @ U
-        self.assertEqual(A, reconstruct_A)
-
     @skipCUDAIfNoMagmaAndNoCusolver
     @skipCPUIfNoLapack
     @dtypes(*floating_and_complex_types())
@@ -2501,7 +2411,7 @@ def cholesky_solve_test_helper(self, A_dims, b_dims, upper, device, dtype):
 
         b = torch.randn(*b_dims, dtype=dtype, device=device)
         A = random_hermitian_pd_matrix(*A_dims, dtype=dtype, device=device)
-        L = torch.cholesky(A, upper=upper)
+        L = torch.linalg.cholesky(A, upper=upper)
         return b, A, L
 
     @skipCUDAIfNoMagma
diff --git a/test/test_meta.py b/test/test_meta.py
index 848de1cf699122..72f4008763a55d 100644
--- a/test/test_meta.py
+++ b/test/test_meta.py
@@ -633,7 +633,6 @@ def run_meta_crossref(
     torch.polar : {f64, f32},
     torch._segment_reduce : {f64, f16, bf16, f32},
     torch.searchsorted : {f64, i32, i64, f16, u8, i16, bf16, i8, f32},
-    torch.cholesky : {f64, f32, c128, c64},
     torch.cholesky_inverse : {f64, f32, c128, c64},
     torch.cholesky_solve : {f64, f32, c128, c64},
     torch.linalg.eig : {f64, f32, c128, c64},
@@ -822,8 +821,6 @@ def __torch_function__(self, func, types, args=(), kwargs=None):
 # these always fail
 meta_dispatch_expected_failures = {
     aten.allclose.default: {f16, bf16, f32, f64, c64, c128},  # NotImplementedError: 'aten::_local_scalar_dense'
-    aten.cholesky.default : {c64, c128, f64, f32},
-    aten.cholesky.out : {c64, c128, f64, f32},
     aten.cholesky_inverse.default : {c64, c128, f64, f32},
     aten.cholesky_inverse.out : {c64, c128, f64, f32},
     aten.cholesky_solve.default : {c64, c128, f64, f32},
diff --git a/test/test_ops.py b/test/test_ops.py
index 06a52f04586533..14665e07681617 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -1882,7 +1882,6 @@ def test_refs_are_in_decomp_table(self, op):
 
 fake_skips = (
     "aminmax",  # failing input
-    "cholesky",  # Could not run 'aten::cholesky' with arguments from the 'Meta' backend
     "cholesky_inverse",  # Could not run 'aten::cholesky' with arguments from the 'Meta' backend
     "cov",  # aweights cannot be negtaive
     "istft",  # window overlap add min: 0
@@ -1987,7 +1986,6 @@ def test_refs_are_in_decomp_table(self, op):
     "roll",
     "svd_lowrank",
     "sgn",
-    "cholesky",
 }
 
 fake_backward_xfails = {xfail(stride_skip) for stride_skip in fake_backward_xfails} | {
diff --git a/test/test_proxy_tensor.py b/test/test_proxy_tensor.py
index 753183ef200d4f..79b3cef43378b1 100644
--- a/test/test_proxy_tensor.py
+++ b/test/test_proxy_tensor.py
@@ -1343,7 +1343,6 @@ def f(a, b, c, d, e):
     # FakeTensor fallback doesn't work
     xfail('_segment_reduce', 'lengths'),
     xfail('multinomial'),
-    xfail('cholesky'),
     xfail('cholesky_inverse'),
     # cannot do these as they rely on tensor data
     xfail('repeat_interleave'),
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
index 7feabf5b45eb0e..cf40c3461f16de 100644
--- a/tools/autograd/derivatives.yaml
+++ b/tools/autograd/derivatives.yaml
@@ -371,9 +371,6 @@
   self: zeros_like(grad)
   result: auto_element_wise
 
-- name: cholesky(Tensor self, bool upper=False) -> Tensor
-  self: cholesky_backward(grad, upper, result)
-
 - name: linalg_cholesky_ex(Tensor self, *, bool upper=False, bool check_errors=False) -> (Tensor L, Tensor info)
   self: cholesky_backward(grad, upper, L)
   L: cholesky_jvp(self_t, L, upper)
diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py
index 7dc8dc469e813e..80ddbb5904263e 100644
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py
@@ -235,7 +235,6 @@
     "split_with_sizes_backward",
     "dot",
     "vdot",
-    "cholesky",
     "triangular_solve",
     "mm",
     "_unsafe_view",
diff --git a/torch/__init__.py b/torch/__init__.py
index 28320858955ed3..96ce0ce3f48707 100644
--- a/torch/__init__.py
+++ b/torch/__init__.py
@@ -1466,6 +1466,7 @@ def compiled_with_cxx11_abi():
 
 # Import removed ops with error message about removal
 from ._linalg_utils import (  # type: ignore[misc]
+    cholesky,
     matrix_rank,
     eig,
     solve,
diff --git a/torch/_linalg_utils.py b/torch/_linalg_utils.py
index 3a81fc6c27adce..f2163c1d053166 100644
--- a/torch/_linalg_utils.py
+++ b/torch/_linalg_utils.py
@@ -93,6 +93,13 @@ def symeig(A: Tensor, largest: Optional[bool] = False) -> Tuple[Tensor, Tensor]:
 
 # These functions were deprecated and removed
 # This nice error message can be removed in version 1.13+
+def cholesky(input: Tensor, upper: bool = False, *, out=None) -> Tensor:
+    raise RuntimeError(
+        "This function was deprecated since version 1.9 and is now removed."
+        + "Please use the `torch.linalg.cholesky` function instead.",
+    )
+
+
 def matrix_rank(input, tol=None, symmetric=False, *, out=None) -> Tensor:
     raise RuntimeError(
         "This function was deprecated since version 1.9 and is now removed.",
diff --git a/torch/_tensor.py b/torch/_tensor.py
index 3240effa8037fc..de3058c31980de 100644
--- a/torch/_tensor.py
+++ b/torch/_tensor.py
@@ -647,6 +647,11 @@ def norm(
             )
         return torch.norm(self, p, dim, keepdim, dtype=dtype)
 
+    def cholesky(self, upper=False):
+        from ._linalg_utils import cholesky
+
+        return cholesky(self, upper=upper)
+
     def solve(self, other):
         from ._linalg_utils import solve
 
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
index 7960314fde54ed..8cdf81ff2927af 100644
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@@ -1107,15 +1107,6 @@ def add_docstr_all(method, docstr):
 """,
 )
 
-add_docstr_all(
-    "cholesky",
-    r"""
-cholesky(upper=False) -> Tensor
-
-See :func:`torch.cholesky`
-""",
-)
-
 add_docstr_all(
     "cholesky_solve",
     r"""
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
index c6fe93ef9b7844..0625c07a306443 100644
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@@ -2539,88 +2539,6 @@ def merge_dicts(*dicts):
     ),
 )
 
-add_docstr(
-    torch.cholesky,
-    r"""
-cholesky(input, upper=False, *, out=None) -> Tensor
-
-Computes the Cholesky decomposition of a symmetric positive-definite
-matrix :math:`A` or for batches of symmetric positive-definite matrices.
-
-If :attr:`upper` is ``True``, the returned matrix ``U`` is upper-triangular, and
-the decomposition has the form:
-
-.. math::
-
-  A = U^TU
-
-If :attr:`upper` is ``False``, the returned matrix ``L`` is lower-triangular, and
-the decomposition has the form:
-
-.. math::
-
-    A = LL^T
-
-If :attr:`upper` is ``True``, and :math:`A` is a batch of symmetric positive-definite
-matrices, then the returned tensor will be composed of upper-triangular Cholesky factors
-of each of the individual matrices. Similarly, when :attr:`upper` is ``False``, the returned
-tensor will be composed of lower-triangular Cholesky factors of each of the individual
-matrices.
-
-.. warning::
-
-    :func:`torch.cholesky` is deprecated in favor of :func:`torch.linalg.cholesky`
-    and will be removed in a future PyTorch release.
-
-    ``L = torch.cholesky(A)`` should be replaced with
-
-    .. code:: python
-
-        L = torch.linalg.cholesky(A)
-
-    ``U = torch.cholesky(A, upper=True)`` should be replaced with
-
-    .. code:: python
-
-        U = torch.linalg.cholesky(A).mH
-
-    This transform will produce equivalent results for all valid (symmetric positive definite) inputs.
-
-Args:
-    input (Tensor): the input tensor :math:`A` of size :math:`(*, n, n)` where `*` is zero or more
-                batch dimensions consisting of symmetric positive-definite matrices.
-    upper (bool, optional): flag that indicates whether to return a
-                            upper or lower triangular matrix. Default: ``False``
-
-Keyword args:
-    out (Tensor, optional): the output matrix
-
-Example::
-
-    >>> a = torch.randn(3, 3)
-    >>> a = a @ a.mT + 1e-3 # make symmetric positive-definite
-    >>> l = torch.cholesky(a)
-    >>> a
-    tensor([[ 2.4112, -0.7486,  1.4551],
-            [-0.7486,  1.3544,  0.1294],
-            [ 1.4551,  0.1294,  1.6724]])
-    >>> l
-    tensor([[ 1.5528,  0.0000,  0.0000],
-            [-0.4821,  1.0592,  0.0000],
-            [ 0.9371,  0.5487,  0.7023]])
-    >>> l @ l.mT
-    tensor([[ 2.4112, -0.7486,  1.4551],
-            [-0.7486,  1.3544,  0.1294],
-            [ 1.4551,  0.1294,  1.6724]])
-    >>> a = torch.randn(3, 2, 2) # Example for batched input
-    >>> a = a @ a.mT + 1e-03 # make symmetric positive-definite
-    >>> l = torch.cholesky(a)
-    >>> z = l @ l.mT
-    >>> torch.dist(z, a)
-    tensor(2.3842e-07)
-""",
-)
-
 add_docstr(
     torch.cholesky_solve,
     r"""
diff --git a/torch/overrides.py b/torch/overrides.py
index 844ebe263612fd..06945272203774 100644
--- a/torch/overrides.py
+++ b/torch/overrides.py
@@ -265,6 +265,7 @@ def get_ignored_functions() -> Set[Callable]:
         Tensor.__class__,
         Tensor.__subclasshook__,
         Tensor.__hash__,
+        Tensor.cholesky,
         Tensor.as_subclass,
         Tensor.eig,
         Tensor.lstsq,
@@ -436,7 +437,6 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
         torch.celu: lambda input, alpha=1., inplace=False: -1,
         torch.chain_matmul: lambda *matrices, out=None: -1,
         torch.channel_shuffle: lambda input, groups : -1,
-        torch.cholesky: lambda input, upper=False, out=None: -1,
         torch.linalg.cholesky: lambda input, out=None: -1,
         torch.linalg.cholesky_ex: lambda input, check_errors=False, out=None: -1,
         torch.cholesky_inverse: lambda input, upper=False, out=None: -1,
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index df00d4a29a7752..2eb7a84502e761 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -112,7 +112,6 @@
 )
 from torch.testing._internal import opinfo
 from torch.testing._internal.opinfo.definitions.linalg import (
-    sample_inputs_linalg_cholesky,
     sample_inputs_linalg_cholesky_inverse,
     sample_inputs_cross,
     sample_inputs_linalg_qr_geqrf,
@@ -9806,11 +9805,6 @@ def reference_flatten(input, start_dim=0, end_dim=-1):
                    supports_sparse_bsr=True,
                    supports_sparse_bsc=True,
                    assert_autodiffed=True),
-    OpInfo('cholesky',
-           dtypes=floating_and_complex_types(),
-           sample_inputs_func=sample_inputs_linalg_cholesky,
-           gradcheck_wrapper=gradcheck_wrapper_hermitian_input,
-           decorators=[skipCUDAIfNoMagma, skipCPUIfNoLapack],),
     OpInfo('cholesky_inverse',
            dtypes=floating_and_complex_types(),
            backward_dtypes=floating_and_complex_types(),