diff --git a/astropy/stats/bayesian_blocks.py b/astropy/stats/bayesian_blocks.py
index b08c68a5d870..4f688ed16fbe 100644
--- a/astropy/stats/bayesian_blocks.py
+++ b/astropy/stats/bayesian_blocks.py
@@ -45,19 +45,36 @@
    https://www.tandfonline.com/doi/abs/10.1080/01621459.1969.10501038
 """
 
+from __future__ import annotations
+
 import warnings
 from inspect import signature
+from typing import TYPE_CHECKING
 
 import numpy as np
 
 from astropy.utils.exceptions import AstropyUserWarning
 
+if TYPE_CHECKING:
+    from collections.abc import KeysView
+    from typing import Literal
+
+    from numpy.typing import ArrayLike, NDArray
+
+# TODO: typing: use a custom-defined 'ArrayLike-but-not-a-scalar' type for `float | ArrayLike` or `ArrayLike | float` hints
+
 # TODO: implement other fitness functions from appendix C of Scargle 2013
 
 __all__ = ["FitnessFunc", "Events", "RegularEvents", "PointMeasures", "bayesian_blocks"]
 
 
-def bayesian_blocks(t, x=None, sigma=None, fitness="events", **kwargs):
+def bayesian_blocks(
+    t: ArrayLike,
+    x: ArrayLike | None = None,
+    sigma: ArrayLike | float | None = None,
+    fitness: Literal["events", "regular_events", "measures"] | FitnessFunc = "events",
+    **kwargs,
+) -> NDArray[float]:
     r"""Compute optimal segmentation of data with Scargle's Bayesian Blocks.
 
     This is a flexible implementation of the Bayesian Blocks algorithm
@@ -210,12 +227,22 @@ class FitnessFunc:
        https://ui.adsabs.harvard.edu/abs/2013ApJ...764..167S
     """
 
-    def __init__(self, p0=0.05, gamma=None, ncp_prior=None):
+    def __init__(
+        self,
+        p0: float = 0.05,
+        gamma: float | None = None,
+        ncp_prior: float | None = None,
+    ) -> None:
         self.p0 = p0
         self.gamma = gamma
         self.ncp_prior = ncp_prior
 
-    def validate_input(self, t, x=None, sigma=None):
+    def validate_input(
+        self,
+        t: ArrayLike,
+        x: ArrayLike | None = None,
+        sigma: float | ArrayLike | None = None,
+    ) -> tuple[NDArray[float], NDArray[float], NDArray[float]]:
         """Validate inputs to the model.
 
         Parameters
@@ -229,7 +256,7 @@ def validate_input(self, t, x=None, sigma=None):
 
         Returns
         -------
-        t, x, sigma : array-like, float or None
+        t, x, sigma : array-like, float
             validated and perhaps modified versions of inputs
         """
         # validate array input
@@ -246,7 +273,7 @@ def validate_input(self, t, x=None, sigma=None):
             if sigma is not None:
                 raise ValueError("If sigma is specified, x must be specified")
             else:
-                sigma = 1
+                sigma = 1.0
 
             if len(unq_t) == len(t):
                 x = np.ones_like(t)
@@ -273,7 +300,7 @@ def validate_input(self, t, x=None, sigma=None):
 
         # verify the given sigma value
         if sigma is None:
-            sigma = 1
+            sigma = 1.0
         else:
             sigma = np.asarray(sigma, dtype=float)
             if sigma.shape not in [(), (1,), (t.size,)]:
@@ -284,7 +311,7 @@ def validate_input(self, t, x=None, sigma=None):
     def fitness(self, **kwargs):
         raise NotImplementedError()
 
-    def p0_prior(self, N):
+    def p0_prior(self, N: int) -> float:
         """Empirical prior, parametrized by the false alarm probability ``p0``.
 
         See eq. 21 in Scargle (2013).
@@ -298,10 +325,10 @@ def p0_prior(self, N):
     # the fitness_args property will return the list of arguments accepted by
     # the method fitness().  This allows more efficient computation below.
     @property
-    def _fitness_args(self):
+    def _fitness_args(self) -> KeysView[str]:
         return signature(self.fitness).parameters.keys()
 
-    def compute_ncp_prior(self, N):
+    def compute_ncp_prior(self, N: int) -> float:
         """
         If ``ncp_prior`` is not explicitly defined, compute it from ``gamma``
         or ``p0``.
@@ -316,7 +343,12 @@ def compute_ncp_prior(self, N):
                 "``gamma`` nor ``p0`` is defined."
             )
 
-    def fit(self, t, x=None, sigma=None):
+    def fit(
+        self,
+        t: ArrayLike,
+        x: ArrayLike | None = None,
+        sigma: ArrayLike | float | None = None,
+    ) -> NDArray[float]:
         """Fit the Bayesian Blocks model given the specified fitness function.
 
         Parameters
@@ -439,11 +471,16 @@ class Events(FitnessFunc):
         If ``ncp_prior`` is specified, ``gamma`` and ``p0`` is ignored.
     """
 
-    def fitness(self, N_k, T_k):
+    def fitness(self, N_k: NDArray[float], T_k: NDArray[float]) -> NDArray[float]:
         # eq. 19 from Scargle 2013
         return N_k * (np.log(N_k / T_k))
 
-    def validate_input(self, t, x, sigma):
+    def validate_input(
+        self,
+        t: ArrayLike,
+        x: ArrayLike | None,
+        sigma: float | ArrayLike | None,
+    ) -> tuple[NDArray[float], NDArray[float], NDArray[float]]:
         t, x, sigma = super().validate_input(t, x, sigma)
         if x is not None and np.any(x % 1 > 0):
             raise ValueError("x must be integer counts for fitness='events'")
@@ -465,6 +502,10 @@ class RegularEvents(FitnessFunc):
         False alarm probability, used to compute the prior on :math:`N_{\rm
         blocks}` (see eq. 21 of Scargle 2013). If gamma is specified, p0 is
         ignored.
+    gamma : float, optional
+        If specified, then use this gamma to compute the general prior form,
+        :math:`p \sim {\tt gamma}^{N_{\rm blocks}}`.  If gamma is specified, p0
+        is ignored.
     ncp_prior : float, optional
         If specified, use the value of ``ncp_prior`` to compute the prior as
         above, using the definition :math:`{\tt ncp\_prior} = -\ln({\tt
@@ -472,17 +513,28 @@ class RegularEvents(FitnessFunc):
         ignored.
     """
 
-    def __init__(self, dt, p0=0.05, gamma=None, ncp_prior=None):
+    def __init__(
+        self,
+        dt: float,
+        p0: float | None = 0.05,
+        gamma: float | None = None,
+        ncp_prior: float | None = None,
+    ) -> None:
         self.dt = dt
         super().__init__(p0, gamma, ncp_prior)
 
-    def validate_input(self, t, x, sigma):
+    def validate_input(
+        self,
+        t: ArrayLike,
+        x: ArrayLike | None = None,
+        sigma: float | ArrayLike | None = None,
+    ) -> tuple[NDArray[float], NDArray[float], NDArray[float]]:
         t, x, sigma = super().validate_input(t, x, sigma)
         if not np.all((x == 0) | (x == 1)):
             raise ValueError("Regular events must have only 0 and 1 in x")
         return t, x, sigma
 
-    def fitness(self, T_k, N_k):
+    def fitness(self, T_k: NDArray[float], N_k: NDArray[float]) -> NDArray[float]:
         # Eq. C23 of Scargle 2013
         M_k = T_k / self.dt
         N_over_M = N_k / M_k
@@ -510,6 +562,10 @@ class PointMeasures(FitnessFunc):
         False alarm probability, used to compute the prior on :math:`N_{\rm
         blocks}` (see eq. 21 of Scargle 2013). If gamma is specified, p0 is
         ignored.
+    gamma : float, optional
+        If specified, then use this gamma to compute the general prior form,
+        :math:`p \sim {\tt gamma}^{N_{\rm blocks}}`.  If gamma is specified, p0
+        is ignored.
     ncp_prior : float, optional
         If specified, use the value of ``ncp_prior`` to compute the prior as
         above, using the definition :math:`{\tt ncp\_prior} = -\ln({\tt
@@ -517,14 +573,24 @@ class PointMeasures(FitnessFunc):
         ignored.
     """
 
-    def __init__(self, p0=0.05, gamma=None, ncp_prior=None):
+    def __init__(
+        self,
+        p0: float | None = 0.05,
+        gamma: float | None = None,
+        ncp_prior: float | None = None,
+    ) -> None:
         super().__init__(p0, gamma, ncp_prior)
 
-    def fitness(self, a_k, b_k):
+    def fitness(self, a_k: NDArray[float], b_k: ArrayLike) -> NDArray[float]:
         # eq. 41 from Scargle 2013
         return (b_k * b_k) / (4 * a_k)
 
-    def validate_input(self, t, x, sigma):
+    def validate_input(
+        self,
+        t: ArrayLike,
+        x: ArrayLike | None,
+        sigma: float | ArrayLike | None,
+    ) -> tuple[NDArray[float], NDArray[float], NDArray[float]]:
         if x is None:
             raise ValueError("x must be specified for point measures")
         return super().validate_input(t, x, sigma)
diff --git a/astropy/stats/biweight.py b/astropy/stats/biweight.py
index 4bae752a0f39..add5bc977615 100644
--- a/astropy/stats/biweight.py
+++ b/astropy/stats/biweight.py
@@ -4,10 +4,21 @@
 Tukey's biweight function.
 """
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from .funcs import median_absolute_deviation
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from numpy.typing import ArrayLike, NDArray
+
+# TODO: typing: use a custom-defined 'ArrayLike-but-not-a-scalar' type for `float | ArrayLike` or `ArrayLike | float` hints
+
 __all__ = [
     "biweight_location",
     "biweight_scale",
@@ -17,7 +28,11 @@
 ]
 
 
-def _stat_functions(data, ignore_nan=False):
+def _stat_functions(
+    data: ArrayLike,
+    ignore_nan: bool | None = False,
+) -> tuple[Callable[..., NDArray[float]], Callable[..., NDArray[float]]]:
+    # TODO: typing: update return Callables with custom callback protocol (https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols)
     if isinstance(data, np.ma.MaskedArray):
         median_func = np.ma.median
         sum_func = np.ma.sum
@@ -31,7 +46,14 @@ def _stat_functions(data, ignore_nan=False):
     return median_func, sum_func
 
 
-def biweight_location(data, c=6.0, M=None, axis=None, *, ignore_nan=False):
+def biweight_location(
+    data: ArrayLike,
+    c: float | None = 6.0,
+    M: float | ArrayLike | None = None,
+    axis: int | tuple[int, ...] | None = None,
+    *,
+    ignore_nan: bool | None = False,
+) -> float | NDArray[float]:
     r"""
     Compute the biweight location.
 
@@ -73,7 +95,7 @@ def biweight_location(data, c=6.0, M=None, axis=None, *, ignore_nan=False):
         ``axis`` of the input array.  If `None` (default), then the
         median of the input array will be used (or along each ``axis``,
         if specified).
-    axis : None, int, or tuple of int, optional
+    axis : int or tuple of int, optional
         The axis or axes along which the biweight locations are
         computed.  If `None` (default), then the biweight location of
         the flattened input array will be computed.
@@ -160,8 +182,14 @@ def biweight_location(data, c=6.0, M=None, axis=None, *, ignore_nan=False):
 
 
 def biweight_scale(
-    data, c=9.0, M=None, axis=None, modify_sample_size=False, *, ignore_nan=False
-):
+    data: ArrayLike,
+    c: float | None = 9.0,
+    M: float | ArrayLike | None = None,
+    axis: int | tuple[int, ...] | None = None,
+    modify_sample_size: bool | None = False,
+    *,
+    ignore_nan: bool | None = False,
+) -> float | NDArray[float]:
     r"""
     Compute the biweight scale.
 
@@ -222,7 +250,7 @@ def biweight_scale(
         containing the location estimate along each ``axis`` of the
         input array.  If `None` (default), then the median of the input
         array will be used (or along each ``axis``, if specified).
-    axis : None, int, or tuple of int, optional
+    axis : int or tuple of int, optional
         The axis or axes along which the biweight scales are computed.
         If `None` (default), then the biweight scale of the flattened
         input array will be computed.
@@ -280,8 +308,14 @@ def biweight_scale(
 
 
 def biweight_midvariance(
-    data, c=9.0, M=None, axis=None, modify_sample_size=False, *, ignore_nan=False
-):
+    data: ArrayLike,
+    c: float | None = 9.0,
+    M: float | ArrayLike | None = None,
+    axis: int | tuple[int, ...] | None = None,
+    modify_sample_size: bool | None = False,
+    *,
+    ignore_nan: bool | None = False,
+) -> float | NDArray[float]:
     r"""
     Compute the biweight midvariance.
 
@@ -341,7 +375,7 @@ def biweight_midvariance(
         containing the location estimate along each ``axis`` of the
         input array.  If `None` (default), then the median of the input
         array will be used (or along each ``axis``, if specified).
-    axis : None, int, or tuple of int, optional
+    axis : int or tuple of int, optional
         The axis or axes along which the biweight midvariances are
         computed.  If `None` (default), then the biweight midvariance of
         the flattened input array will be computed.
@@ -456,7 +490,12 @@ def biweight_midvariance(
         return where_func(mad.squeeze() == 0, 0.0, value)
 
 
-def biweight_midcovariance(data, c=9.0, M=None, modify_sample_size=False):
+def biweight_midcovariance(
+    data: ArrayLike,
+    c: float | None = 9.0,
+    M: float | ArrayLike | None = None,
+    modify_sample_size: bool | None = False,
+) -> NDArray[float]:
     r"""
     Compute the biweight midcovariance between pairs of multiple
     variables.
@@ -667,7 +706,13 @@ def biweight_midcovariance(data, c=9.0, M=None, modify_sample_size=False):
         return value
 
 
-def biweight_midcorrelation(x, y, c=9.0, M=None, modify_sample_size=False):
+def biweight_midcorrelation(
+    x: ArrayLike,
+    y: ArrayLike,
+    c: float | None = 9.0,
+    M: float | ArrayLike | None = None,
+    modify_sample_size: bool | None = False,
+) -> float:
     r"""
     Compute the biweight midcorrelation between two variables.
 
diff --git a/astropy/stats/circstats.py b/astropy/stats/circstats.py
index 6a33c7c323ca..e997cae04c5c 100644
--- a/astropy/stats/circstats.py
+++ b/astropy/stats/circstats.py
@@ -10,10 +10,17 @@
 'CircStats' [2]_.
 """
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from astropy.units import Quantity
 
+if TYPE_CHECKING:
+    from numpy.typing import NDArray
+
 __all__ = [
     "circmean",
     "circstd",
@@ -27,7 +34,13 @@
 __doctest_requires__ = {"vtest": ["scipy"]}
 
 
-def _components(data, p=1, phi=0.0, axis=None, weights=None):
+def _components(
+    data: NDArray | Quantity,
+    p: float | None = 1.0,
+    phi: float | NDArray | Quantity = 0.0,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> NDArray | Quantity:
     # Utility function for computing the generalized rectangular components
     # of the circular data.
     if weights is None:
@@ -43,7 +56,13 @@ def _components(data, p=1, phi=0.0, axis=None, weights=None):
     return C, S
 
 
-def _angle(data, p=1, phi=0.0, axis=None, weights=None):
+def _angle(
+    data: NDArray | Quantity,
+    p: float | None = 1.0,
+    phi: float | NDArray | Quantity = 0.0,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> NDArray | Quantity:
     # Utility function for computing the generalized sample mean angle
     C, S = _components(data, p, phi, axis, weights)
 
@@ -57,13 +76,23 @@ def _angle(data, p=1, phi=0.0, axis=None, weights=None):
     return theta
 
 
-def _length(data, p=1, phi=0.0, axis=None, weights=None):
+def _length(
+    data: NDArray | Quantity,
+    p: float | None = 1.0,
+    phi: float | NDArray | Quantity = 0.0,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> NDArray | Quantity:
     # Utility function for computing the generalized sample length
     C, S = _components(data, p, phi, axis, weights)
     return np.hypot(S, C)
 
 
-def circmean(data, axis=None, weights=None):
+def circmean(
+    data: NDArray | Quantity,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> NDArray | Quantity:
     """Computes the circular mean angle of an array of circular data.
 
     Parameters
@@ -102,10 +131,14 @@ def circmean(data, axis=None, weights=None):
        Circular Statistics (2001)'". 2015.
        <https://cran.r-project.org/web/packages/CircStats/CircStats.pdf>
     """
-    return _angle(data, 1, 0.0, axis, weights)
+    return _angle(data, 1.0, 0.0, axis, weights)
 
 
-def circvar(data, axis=None, weights=None):
+def circvar(
+    data: NDArray | Quantity,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> NDArray | Quantity:
     """Computes the circular variance of an array of circular data.
 
     There are some concepts for defining measures of dispersion for circular
@@ -157,10 +190,15 @@ def circvar(data, axis=None, weights=None):
     ``scipy.stats.cirvar`` uses a definition consistent with this
     implementation.
     """
-    return 1.0 - _length(data, 1, 0.0, axis, weights)
+    return 1.0 - _length(data, 1.0, 0.0, axis, weights)
 
 
-def circstd(data, axis=None, weights=None, method="angular"):
+def circstd(
+    data: NDArray | Quantity,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+    method: str | None = "angular",
+) -> NDArray | Quantity:
     """Computes the circular standard deviation of an array of circular data.
 
     The standard deviation implemented here is based on the definitions given
@@ -233,12 +271,18 @@ def circstd(data, axis=None, weights=None, method="angular"):
         raise ValueError("method should be either 'angular' or 'circular'")
 
     if method == "angular":
-        return np.sqrt(2.0 * (1.0 - _length(data, 1, 0.0, axis, weights)))
+        return np.sqrt(2.0 * (1.0 - _length(data, 1.0, 0.0, axis, weights)))
     else:
-        return np.sqrt(-2.0 * np.log(_length(data, 1, 0.0, axis, weights)))
+        return np.sqrt(-2.0 * np.log(_length(data, 1.0, 0.0, axis, weights)))
 
 
-def circmoment(data, p=1.0, centered=False, axis=None, weights=None):
+def circmoment(
+    data: NDArray | Quantity,
+    p: float | None = 1.0,
+    centered: bool | None = False,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> NDArray | Quantity:
     """Computes the ``p``-th trigonometric circular moment for an array
     of circular data.
 
@@ -292,7 +336,13 @@ def circmoment(data, p=1.0, centered=False, axis=None, weights=None):
     return _angle(data, p, phi, axis, weights), _length(data, p, phi, axis, weights)
 
 
-def circcorrcoef(alpha, beta, axis=None, weights_alpha=None, weights_beta=None):
+def circcorrcoef(
+    alpha: NDArray | Quantity,
+    beta: NDArray | Quantity,
+    axis: int | None = None,
+    weights_alpha: NDArray | None = None,
+    weights_beta: NDArray | None = None,
+) -> NDArray | Quantity:
     """Computes the circular correlation coefficient between two array of
     circular data.
 
@@ -355,7 +405,11 @@ def circcorrcoef(alpha, beta, axis=None, weights_alpha=None, weights_beta=None):
     return rho
 
 
-def rayleightest(data, axis=None, weights=None):
+def rayleightest(
+    data: NDArray | Quantity,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> float | Quantity:
     """Performs the Rayleigh test of uniformity.
 
     This test is  used to identify a non-uniform distribution, i.e. it is
@@ -407,7 +461,7 @@ def rayleightest(data, axis=None, weights=None):
        <http://wexler.free.fr/library/files/wilkie%20(1983)%20rayleigh%20test%20for%20randomness%20of%20circular%20data.pdf>
     """
     n = np.size(data, axis=axis)
-    Rbar = _length(data, 1, 0.0, axis, weights)
+    Rbar = _length(data, 1.0, 0.0, axis, weights)
     z = n * Rbar * Rbar
 
     # see [3] and [4] for the formulae below
@@ -424,7 +478,12 @@ def rayleightest(data, axis=None, weights=None):
     return p_value
 
 
-def vtest(data, mu=0.0, axis=None, weights=None):
+def vtest(
+    data: NDArray | Quantity,
+    mu: float | Quantity | None = 0.0,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> float | Quantity:
     """Performs the Rayleigh test of uniformity where the alternative
     hypothesis H1 is assumed to have a known mean angle ``mu``.
 
@@ -493,7 +552,7 @@ def vtest(data, mu=0.0, axis=None, weights=None):
     return p_value
 
 
-def _A1inv(x):
+def _A1inv(x: NDArray | Quantity) -> NDArray | Quantity:
     # Approximation for _A1inv(x) according R Package 'CircStats'
     # See http://www.scienceasia.org/2012.38.n1/scias38_118.pdf, equation (4)
 
@@ -510,7 +569,11 @@ def _A1inv(x):
     return kappa1 + kappa2 + kappa3
 
 
-def vonmisesmle(data, axis=None, weights=None):
+def vonmisesmle(
+    data: NDArray | Quantity,
+    axis: int | None = None,
+    weights: NDArray | None = None,
+) -> tuple[float | Quantity, float | Quantity]:
     """Computes the Maximum Likelihood Estimator (MLE) for the parameters of
     the von Mises distribution.
 
@@ -553,5 +616,5 @@ def vonmisesmle(data, axis=None, weights=None):
     """
     mu = circmean(data, axis=axis, weights=weights)
 
-    kappa = _A1inv(_length(data, p=1, phi=0.0, axis=axis, weights=weights))
+    kappa = _A1inv(_length(data, p=1.0, phi=0.0, axis=axis, weights=weights))
     return mu, kappa
diff --git a/astropy/stats/funcs.py b/astropy/stats/funcs.py
index 0325b4448662..8e0e4b28608b 100644
--- a/astropy/stats/funcs.py
+++ b/astropy/stats/funcs.py
@@ -9,7 +9,10 @@
 should be used for access.
 """
 
+from __future__ import annotations
+
 import math
+from typing import TYPE_CHECKING
 
 import numpy as np
 
@@ -17,6 +20,15 @@
 
 from . import _stats
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from typing import Literal, SupportsFloat, TypeVar
+
+    from numpy.typing import ArrayLike, NDArray
+
+    # type for variables generated with the mpmath library
+    FloatLike = TypeVar("FloatLike", bound=SupportsFloat)
+
 __all__ = [
     "gaussian_fwhm_to_sigma",
     "gaussian_sigma_to_fwhm",
@@ -56,7 +68,12 @@
 """
 
 
-def binom_conf_interval(k, n, confidence_level=0.68269, interval="wilson"):
+def binom_conf_interval(
+    k: int | NDArray,
+    n: int | NDArray,
+    confidence_level: float | None = 0.68269,
+    interval: Literal["wilson", "jeffreys", "flat", "wald"] = "wilson",
+) -> NDArray:
     r"""Binomial proportion confidence interval given k successes,
     n trials.
 
@@ -286,8 +303,13 @@ def binom_conf_interval(k, n, confidence_level=0.68269, interval="wilson"):
 
 
 def binned_binom_proportion(
-    x, success, bins=10, range=None, confidence_level=0.68269, interval="wilson"
-):
+    x: ArrayLike,
+    success: ArrayLike,
+    bins: int | ArrayLike = 10,
+    range: tuple[float, float] | None = None,
+    confidence_level: float | None = 0.68269,
+    interval: Literal["wilson", "jeffreys", "flat", "wald"] = "wilson",
+) -> tuple[NDArray, NDArray, NDArray, NDArray]:
     """Binomial proportion and confidence interval in bins of a continuous
     variable ``x``.
 
@@ -468,7 +490,19 @@ def true_efficiency(x):
     return bin_ctr, bin_halfwidth, p, perr
 
 
-def _check_poisson_conf_inputs(sigma, background, confidence_level, name):
+def _check_poisson_conf_inputs(
+    sigma: float,
+    background: float,
+    confidence_level: float,
+    name: Literal[
+        "root-n",
+        "root-n-0",
+        "pearson",
+        "sherpagehrels",
+        "frequentist-confidence",
+        "kraft-burrows-nousek",
+    ],
+) -> None:
     if sigma != 1:
         raise ValueError(f"Only sigma=1 supported for interval {name}")
     if background != 0:
@@ -478,8 +512,19 @@ def _check_poisson_conf_inputs(sigma, background, confidence_level, name):
 
 
 def poisson_conf_interval(
-    n, interval="root-n", sigma=1, background=0, confidence_level=None
-):
+    n: int | NDArray,
+    interval: Literal[
+        "root-n",
+        "root-n-0",
+        "pearson",
+        "sherpagehrels",
+        "frequentist-confidence",
+        "kraft-burrows-nousek",
+    ] = "root-n",
+    sigma: float | None = 1.0,
+    background: float | None = 0.0,
+    confidence_level: float | None = None,
+) -> NDArray:
     r"""Poisson parameter confidence interval given observed counts.
 
     Parameters
@@ -746,7 +791,12 @@ def poisson_conf_interval(
     return conf_interval
 
 
-def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
+def median_absolute_deviation(
+    data: ArrayLike,
+    axis: int | tuple[int, ...] | None = None,
+    func: Callable | None = None,
+    ignore_nan: bool | None = False,
+) -> float | NDArray:
     """
     Calculate the median absolute deviation (MAD).
 
@@ -762,7 +812,7 @@ def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
     func : callable, optional
         The function used to compute the median. Defaults to `numpy.ma.median`
         for masked arrays, otherwise to `numpy.median`.
-    ignore_nan : bool
+    ignore_nan : bool, optional
         Ignore NaN values (treat them as if they are not in the array) when
         computing the median.  This will use `numpy.ma.median` if ``axis`` is
         specified, or `numpy.nanmedian` if ``axis==None`` and numpy's version
@@ -833,7 +883,12 @@ def median_absolute_deviation(data, axis=None, func=None, ignore_nan=False):
     return result
 
 
-def mad_std(data, axis=None, func=None, ignore_nan=False):
+def mad_std(
+    data: ArrayLike,
+    axis: int | tuple[int, ...] | None = None,
+    func: Callable | None = None,
+    ignore_nan: bool | None = False,
+) -> float | NDArray:
     r"""
     Calculate a robust standard deviation using the `median absolute
     deviation (MAD)
@@ -860,7 +915,7 @@ def mad_std(data, axis=None, func=None, ignore_nan=False):
     func : callable, optional
         The function used to compute the median. Defaults to `numpy.ma.median`
         for masked arrays, otherwise to `numpy.median`.
-    ignore_nan : bool
+    ignore_nan : bool, optional
         Ignore NaN values (treat them as if they are not in the array) when
         computing the median.  This will use `numpy.ma.median` if ``axis`` is
         specified, or `numpy.nanmedian` if ``axis=None`` and numpy's version is
@@ -891,7 +946,15 @@ def mad_std(data, axis=None, func=None, ignore_nan=False):
     return MAD * 1.482602218505602
 
 
-def signal_to_noise_oir_ccd(t, source_eps, sky_eps, dark_eps, rd, npix, gain=1.0):
+def signal_to_noise_oir_ccd(
+    t: float | NDArray,
+    source_eps: float,
+    sky_eps: float,
+    dark_eps: float,
+    rd: float,
+    npix: float,
+    gain: float | None = 1.0,
+) -> float | NDArray:
     """Computes the signal to noise ratio for source being observed in the
     optical/IR using a CCD.
 
@@ -934,7 +997,12 @@ def signal_to_noise_oir_ccd(t, source_eps, sky_eps, dark_eps, rd, npix, gain=1.0
     return signal / noise
 
 
-def bootstrap(data, bootnum=100, samples=None, bootfunc=None):
+def bootstrap(
+    data: NDArray,
+    bootnum: int | None = 100,
+    samples: int | None = None,
+    bootfunc: Callable | None = None,
+) -> NDArray:
     """Performs bootstrap resampling on numpy arrays.
 
     Bootstrap resampling is used to understand confidence intervals of sample
@@ -1047,7 +1115,7 @@ def bootstrap(data, bootnum=100, samples=None, bootfunc=None):
     return boot
 
 
-def _scipy_kraft_burrows_nousek(N, B, CL):
+def _scipy_kraft_burrows_nousek(N: int, B: float, CL: float) -> tuple[float, float]:
     """Upper limit on a poisson count rate.
 
     The implementation is based on Kraft, Burrows and Nousek
@@ -1082,7 +1150,7 @@ def _scipy_kraft_burrows_nousek(N, B, CL):
     from scipy.optimize import brentq
     from scipy.special import factorial
 
-    def eqn8(N, B):
+    def eqn8(N: int, B: float) -> float:
         n = np.arange(N + 1, dtype=np.float64)
         return 1.0 / (exp(-B) * np.sum(np.power(B, n) / factorial(n)))
 
@@ -1093,14 +1161,14 @@ def eqn8(N, B):
     eqn8_res = eqn8(N, B)
     factorial_N = float(math.factorial(N))
 
-    def eqn7(S, N, B):
+    def eqn7(S: float, N: int, B: float) -> float:
         SpB = S + B
         return eqn8_res * (exp(-SpB) * SpB**N / factorial_N)
 
-    def eqn9_left(S_min, S_max, N, B):
+    def eqn9_left(S_min: float, S_max: float, N: int, B: float) -> tuple[float, float]:
         return quad(eqn7, S_min, S_max, args=(N, B), limit=500)
 
-    def find_s_min(S_max, N, B):
+    def find_s_min(S_max: float, N: int, B: float) -> float:
         """
         Kraft, Burrows and Nousek suggest to integrate from N-B in both
         directions at once, so that S_min and S_max move similarly (see
@@ -1115,7 +1183,7 @@ def find_s_min(S_max, N, B):
         else:
             return brentq(lambda x: eqn7(x, N, B) - y_S_max, 0, N - B)
 
-    def func(s):
+    def func(s: float) -> float:
         s_min = find_s_min(s, N, B)
         out = eqn9_left(s_min, s, N, B)
         return out[0] - CL
@@ -1125,7 +1193,7 @@ def func(s):
     return S_min, S_max
 
 
-def _mpmath_kraft_burrows_nousek(N, B, CL):
+def _mpmath_kraft_burrows_nousek(N: int, B: float, CL: float) -> tuple[float, float]:
     """Upper limit on a poisson count rate.
 
     The implementation is based on Kraft, Burrows and Nousek in
@@ -1162,24 +1230,26 @@ def _mpmath_kraft_burrows_nousek(N, B, CL):
     CL = mpf(float(CL))
     tol = 1e-4
 
-    def eqn8(N, B):
+    def eqn8(N: FloatLike, B: FloatLike) -> FloatLike:
         sumterms = [power(B, n) / factorial(n) for n in range(int(N) + 1)]
         return 1.0 / (exp(-B) * fsum(sumterms))
 
     eqn8_res = eqn8(N, B)
     factorial_N = factorial(N)
 
-    def eqn7(S, N, B):
+    def eqn7(S: FloatLike, N: FloatLike, B: FloatLike) -> FloatLike:
         SpB = S + B
         return eqn8_res * (exp(-SpB) * SpB**N / factorial_N)
 
-    def eqn9_left(S_min, S_max, N, B):
-        def eqn7NB(S):
+    def eqn9_left(
+        S_min: FloatLike, S_max: FloatLike, N: FloatLike, B: FloatLike
+    ) -> FloatLike:
+        def eqn7NB(S: FloatLike) -> FloatLike:
             return eqn7(S, N, B)
 
         return quad(eqn7NB, [S_min, S_max])
 
-    def find_s_min(S_max, N, B):
+    def find_s_min(S_max: FloatLike, N: FloatLike, B: FloatLike) -> FloatLike:
         """
         Kraft, Burrows and Nousek suggest to integrate from N-B in both
         directions at once, so that S_min and S_max move similarly (see
@@ -1198,12 +1268,12 @@ def find_s_min(S_max, N, B):
             return 0.0
         else:
 
-            def eqn7ysmax(x):
+            def eqn7ysmax(x: FloatLike) -> FloatLike:
                 return eqn7(x, N, B) - y_S_max
 
             return findroot(eqn7ysmax, [0.0, N - B], solver="ridder", tol=tol)
 
-    def func(s):
+    def func(s: FloatLike) -> FloatLike:
         s_min = find_s_min(s, N, B)
         out = eqn9_left(s_min, s, N, B)
         return out - CL
@@ -1220,7 +1290,7 @@ def func(s):
     return float(S_min), float(S_max)
 
 
-def _kraft_burrows_nousek(N, B, CL):
+def _kraft_burrows_nousek(N: int, B: float, CL: float) -> tuple[float, float]:
     """Upper limit on a poisson count rate.
 
     The implementation is based on Kraft, Burrows and Nousek in
@@ -1261,7 +1331,7 @@ def _kraft_burrows_nousek(N, B, CL):
     raise ImportError("Either scipy or mpmath are required.")
 
 
-def kuiper_false_positive_probability(D, N):
+def kuiper_false_positive_probability(D: float, N: float) -> float:
     """Compute the false positive probability for the Kuiper statistic.
 
     Uses the set of four formulas described in Paltani 2004; they report
@@ -1347,7 +1417,11 @@ def kuiper_false_positive_probability(D, N):
         return S1 - 8 * D / 3 * S2
 
 
-def kuiper(data, cdf=lambda x: x, args=()):
+def kuiper(
+    data: ArrayLike,
+    cdf: Callable = lambda x: x,
+    args: tuple | list | None = (),
+) -> tuple[float, float]:
     """Compute the Kuiper statistic.
 
     Use the Kuiper statistic version of the Kolmogorov-Smirnov test to
@@ -1420,7 +1494,7 @@ def kuiper(data, cdf=lambda x: x, args=()):
     return D, kuiper_false_positive_probability(D, N)
 
 
-def kuiper_two(data1, data2):
+def kuiper_two(data1: ArrayLike, data2: ArrayLike) -> tuple[float, float]:
     """Compute the Kuiper statistic to compare two samples.
 
     Parameters
@@ -1460,7 +1534,9 @@ def kuiper_two(data1, data2):
     return D, kuiper_false_positive_probability(D, Ne)
 
 
-def fold_intervals(intervals):
+def fold_intervals(
+    intervals: list[tuple[float, float, float]],
+) -> tuple[NDArray[float], NDArray[float]]:
     """Fold the weighted intervals to the interval (0,1).
 
     Convert a list of intervals (ai, bi, wi) to a list of non-overlapping
@@ -1512,7 +1588,7 @@ def fold_intervals(intervals):
     return np.array(breaks), totals
 
 
-def cdf_from_intervals(breaks, totals):
+def cdf_from_intervals(breaks: NDArray[float], totals: NDArray[float]) -> Callable:
     """Construct a callable piecewise-linear CDF from a pair of arrays.
 
     Take a pair of arrays in the format returned by fold_intervals and
@@ -1547,7 +1623,7 @@ def cdf_from_intervals(breaks, totals):
     return lambda x: np.interp(x, b, c, 0, 1)
 
 
-def interval_overlap_length(i1, i2):
+def interval_overlap_length(i1: tuple[float, float], i2: tuple[float, float]) -> float:
     """Compute the length of overlap of two intervals.
 
     Parameters
@@ -1579,7 +1655,9 @@ def interval_overlap_length(i1, i2):
         return 0
 
 
-def histogram_intervals(n, breaks, totals):
+def histogram_intervals(
+    n: int, breaks: NDArray[float], totals: NDArray[float]
+) -> NDArray[float]:
     """Histogram of a piecewise-constant weight function.
 
     This function takes a piecewise-constant weight function and
diff --git a/astropy/stats/histogram.py b/astropy/stats/histogram.py
index c2e617c0d545..8a9e274314d8 100644
--- a/astropy/stats/histogram.py
+++ b/astropy/stats/histogram.py
@@ -6,10 +6,19 @@
 Ported from the astroML project: https://www.astroml.org/
 """
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from .bayesian_blocks import bayesian_blocks
 
+if TYPE_CHECKING:
+    from typing import Literal
+
+    from numpy.typing import ArrayLike, NDArray
+
 __all__ = [
     "histogram",
     "scott_bin_width",
@@ -19,7 +28,15 @@
 ]
 
 
-def calculate_bin_edges(a, bins=10, range=None, weights=None):
+def calculate_bin_edges(
+    a: ArrayLike,
+    bins: int
+    | list[int | float]
+    | Literal["blocks", "knuth", "scott", "freedman"]
+    | None = 10,
+    range: tuple[int | float, int | float] | None = None,
+    weights: ArrayLike | None = None,
+) -> NDArray[float]:
     """
     Calculate histogram bin edges like ``numpy.histogram_bin_edges``.
 
@@ -44,6 +61,11 @@ def calculate_bin_edges(a, bins=10, range=None, weights=None):
         the value of the weight corresponding to ``a`` instead of returning the
         count of values. This argument does not affect determination of bin
         edges, though they may be used in the future as new methods are added.
+
+    Returns
+    -------
+    bins : ndarray
+        Histogram bin edges
     """
     # if range is specified, we need to truncate the data for
     # the bin-finding routines
@@ -89,7 +111,16 @@ def calculate_bin_edges(a, bins=10, range=None, weights=None):
     return bins
 
 
-def histogram(a, bins=10, range=None, weights=None, **kwargs):
+def histogram(
+    a: ArrayLike,
+    bins: int
+    | list[int | float]
+    | Literal["blocks", "knuth", "scott", "freedman"]
+    | None = 10,
+    range: tuple[int | float, int | float] | None = None,
+    weights: ArrayLike | None = None,
+    **kwargs,
+) -> tuple[NDArray, NDArray]:
     """Enhanced histogram function, providing adaptive binnings.
 
     This is a histogram function that enables the use of more sophisticated
@@ -143,7 +174,10 @@ def histogram(a, bins=10, range=None, weights=None, **kwargs):
     return np.histogram(a, bins=bins, range=range, weights=weights, **kwargs)
 
 
-def scott_bin_width(data, return_bins=False):
+def scott_bin_width(
+    data: ArrayLike,
+    return_bins: bool | None = False,
+) -> float | tuple[float, NDArray]:
     r"""Return the optimal histogram bin width using Scott's rule.
 
     Scott's rule is a normal reference rule: it minimizes the integrated
@@ -204,7 +238,10 @@ def scott_bin_width(data, return_bins=False):
         return dx
 
 
-def freedman_bin_width(data, return_bins=False):
+def freedman_bin_width(
+    data: ArrayLike,
+    return_bins: bool | None = False,
+) -> float | tuple[float, NDArray]:
     r"""Return the optimal histogram bin width using the Freedman-Diaconis rule.
 
     The Freedman-Diaconis rule is a normal reference rule like Scott's
@@ -279,7 +316,11 @@ def freedman_bin_width(data, return_bins=False):
         return dx
 
 
-def knuth_bin_width(data, return_bins=False, quiet=True):
+def knuth_bin_width(
+    data: ArrayLike,
+    return_bins: bool | None = False,
+    quiet: bool | None = True,
+) -> float | tuple[float, NDArray]:
     r"""Return the optimal histogram bin width using Knuth's rule.
 
     Knuth's rule is a fixed-width, Bayesian approach to determining
@@ -368,7 +409,7 @@ class _KnuthF:
     knuth_bin_width
     """
 
-    def __init__(self, data):
+    def __init__(self, data: ArrayLike) -> None:
         self.data = np.array(data, copy=True)
         if self.data.ndim != 1:
             raise ValueError("data should be 1-dimensional")
@@ -383,14 +424,14 @@ def __init__(self, data):
         # create a reference to gammaln to use in self.eval()
         self.gammaln = special.gammaln
 
-    def bins(self, M):
+    def bins(self, M: int) -> NDArray:
         """Return the bin edges given M number of bins."""
         return np.linspace(self.data[0], self.data[-1], int(M) + 1)
 
-    def __call__(self, M):
+    def __call__(self, M: int) -> float:
         return self.eval(M)
 
-    def eval(self, M):
+    def eval(self, M: int) -> float:
         """Evaluate the Knuth function.
 
         Parameters
diff --git a/astropy/stats/info_theory.py b/astropy/stats/info_theory.py
index 7a3d21b75f16..d761b7ed3880 100644
--- a/astropy/stats/info_theory.py
+++ b/astropy/stats/info_theory.py
@@ -4,6 +4,8 @@
 This module contains simple functions for model selection.
 """
 
+from __future__ import annotations
+
 import numpy as np
 
 __all__ = [
@@ -19,7 +21,11 @@
 }
 
 
-def bayesian_info_criterion(log_likelihood, n_params, n_samples):
+def bayesian_info_criterion(
+    log_likelihood: float,
+    n_params: int,
+    n_samples: int,
+) -> float:
     r"""Computes the Bayesian Information Criterion (BIC) given the log of the
     likelihood function evaluated at the estimated (or analytically derived)
     parameters, the number of parameters, and the number of samples.
@@ -119,7 +125,11 @@ def bayesian_info_criterion(log_likelihood, n_params, n_samples):
 
 # NOTE: bic_t - bic_g doctest is skipped because it produced slightly
 # different result in arm64 and big-endian s390x CI jobs.
-def bayesian_info_criterion_lsq(ssr, n_params, n_samples):
+def bayesian_info_criterion_lsq(
+    ssr: float,
+    n_params: int,
+    n_samples: int,
+) -> float:
     r"""
     Computes the Bayesian Information Criterion (BIC) assuming that the
     observations come from a Gaussian distribution.
@@ -205,7 +215,11 @@ def bayesian_info_criterion_lsq(ssr, n_params, n_samples):
     )
 
 
-def akaike_info_criterion(log_likelihood, n_params, n_samples):
+def akaike_info_criterion(
+    log_likelihood: float,
+    n_params: int,
+    n_samples: int,
+) -> float:
     r"""
     Computes the Akaike Information Criterion (AIC).
 
@@ -310,7 +324,11 @@ def akaike_info_criterion(log_likelihood, n_params, n_samples):
     return aic
 
 
-def akaike_info_criterion_lsq(ssr, n_params, n_samples):
+def akaike_info_criterion_lsq(
+    ssr: float,
+    n_params: int,
+    n_samples: int,
+) -> float:
     r"""
     Computes the Akaike Information Criterion assuming that the observations
     are Gaussian distributed.
diff --git a/astropy/stats/jackknife.py b/astropy/stats/jackknife.py
index d789551479aa..fbc303e79685 100644
--- a/astropy/stats/jackknife.py
+++ b/astropy/stats/jackknife.py
@@ -1,12 +1,24 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import numpy as np
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from typing import TypeVar
+
+    from numpy.typing import NDArray
+
+    DT = TypeVar("DT", bound=np.generic)
+
 __all__ = ["jackknife_resampling", "jackknife_stats"]
 __doctest_requires__ = {"jackknife_stats": ["scipy"]}
 
 
-def jackknife_resampling(data):
+def jackknife_resampling(data: NDArray[DT]) -> NDArray[DT]:
     """Performs jackknife resampling on numpy arrays.
 
     Jackknife resampling is a technique to generate 'n' deterministic samples
@@ -51,7 +63,11 @@ def jackknife_resampling(data):
     return resamples
 
 
-def jackknife_stats(data, statistic, confidence_level=0.95):
+def jackknife_stats(
+    data: NDArray,
+    statistic: Callable,
+    confidence_level: float | None = 0.95,
+) -> tuple[float | NDArray, float | NDArray, float | NDArray, NDArray]:
     """Performs jackknife estimation on the basis of jackknife resamples.
 
     This function requires `SciPy <https://www.scipy.org/>`_ to be installed.
diff --git a/astropy/stats/setup_package.py b/astropy/stats/setup_package.py
index e5aff4db91a0..456dcfa464b8 100644
--- a/astropy/stats/setup_package.py
+++ b/astropy/stats/setup_package.py
@@ -1,5 +1,7 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+from __future__ import annotations
+
 import os
 
 from numpy import get_include as get_numpy_include
@@ -10,7 +12,7 @@
 SRCFILES = [os.path.join(ROOT, "src", srcfile) for srcfile in SRCFILES]
 
 
-def get_extensions():
+def get_extensions() -> list[Extension, Extension]:
     _sigma_clip_ext = Extension(
         name="astropy.stats._fast_sigma_clip",
         define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
diff --git a/astropy/stats/sigma_clipping.py b/astropy/stats/sigma_clipping.py
index 3b74536b294a..56406863e7ca 100644
--- a/astropy/stats/sigma_clipping.py
+++ b/astropy/stats/sigma_clipping.py
@@ -1,7 +1,10 @@
 # Licensed under a 3-clause BSD style license - see LICENSE.rst
 
+from __future__ import annotations
+
 import functools
 import warnings
+from typing import TYPE_CHECKING
 
 import numpy as np
 
@@ -18,13 +21,22 @@
 else:
     from numpy.lib.array_utils import normalize_axis_index
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from typing import Literal
+
+    from numpy.typing import ArrayLike, NDArray
+
 __all__ = ["SigmaClip", "sigma_clip", "sigma_clipped_stats"]
 
 
 if HAS_BOTTLENECK:
     import bottleneck
 
-    def _move_tuple_axes_first(array, axis):
+    def _move_tuple_axes_first(
+        array: ArrayLike,
+        axis: tuple[int, ...] | None = None,
+    ) -> ArrayLike:
         """
         Bottleneck can only take integer axis, not tuple, so this function
         takes all the axes to be operated on and combines them into the
@@ -49,7 +61,12 @@ def _move_tuple_axes_first(array, axis):
 
         return array_new
 
-    def _apply_bottleneck(function, array, axis=None, **kwargs):
+    def _apply_bottleneck(
+        function: Callable,
+        array: ArrayLike,
+        axis: int | tuple[int, ...] | None = None,
+        **kwargs,
+    ) -> float | NDArray | Quantity:
         """Wrap bottleneck function to handle tuple axis.
 
         Also takes care to ensure the output is of the expected type,
@@ -78,7 +95,10 @@ def _apply_bottleneck(function, array, axis=None, **kwargs):
     _nanstd = np.nanstd
 
 
-def _nanmadstd(array, axis=None):
+def _nanmadstd(
+    array: ArrayLike,
+    axis: int | tuple[int, ...] | None = None,
+) -> float | NDArray:
     """mad_std function that ignores NaNs by default."""
     return mad_std(array, axis=axis, ignore_nan=True)
 
@@ -218,14 +238,14 @@ class SigmaClip:
 
     def __init__(
         self,
-        sigma=3.0,
-        sigma_lower=None,
-        sigma_upper=None,
-        maxiters=5,
-        cenfunc="median",
-        stdfunc="std",
-        grow=False,
-    ):
+        sigma: float | None = 3.0,
+        sigma_lower: float | None = None,
+        sigma_upper: float | None = None,
+        maxiters: int | None = 5,
+        cenfunc: Literal["median", "mean"] | Callable | None = "median",
+        stdfunc: Literal["std", "mad_std"] | Callable | None = "std",
+        grow: float | Literal[False] | None = False,
+    ) -> None:
         self.sigma = sigma
         self.sigma_lower = sigma_lower or sigma
         self.sigma_upper = sigma_upper or sigma
@@ -246,14 +266,14 @@ def __init__(
 
             self._binary_dilation = binary_dilation
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return (
             f"SigmaClip(sigma={self.sigma}, sigma_lower={self.sigma_lower},"
             f" sigma_upper={self.sigma_upper}, maxiters={self.maxiters},"
             f" cenfunc={self.cenfunc!r}, stdfunc={self.stdfunc!r}, grow={self.grow})"
         )
 
-    def __str__(self):
+    def __str__(self) -> str:
         lines = ["<" + self.__class__.__name__ + ">"]
         attrs = [
             "sigma",
@@ -269,7 +289,9 @@ def __str__(self):
         return "\n".join(lines)
 
     @staticmethod
-    def _parse_cenfunc(cenfunc):
+    def _parse_cenfunc(
+        cenfunc: Literal["median", "mean"] | Callable | None,
+    ) -> Callable | None:
         if isinstance(cenfunc, str):
             if cenfunc == "median":
                 cenfunc = _nanmedian
@@ -283,7 +305,9 @@ def _parse_cenfunc(cenfunc):
         return cenfunc
 
     @staticmethod
-    def _parse_stdfunc(stdfunc):
+    def _parse_stdfunc(
+        stdfunc: Literal["std", "mad_std"] | Callable | None,
+    ) -> Callable | None:
         if isinstance(stdfunc, str):
             if stdfunc == "std":
                 stdfunc = _nanstd
@@ -294,7 +318,11 @@ def _parse_stdfunc(stdfunc):
 
         return stdfunc
 
-    def _compute_bounds(self, data, axis=None):
+    def _compute_bounds(
+        self,
+        data: ArrayLike,
+        axis: int | tuple[int, ...] | None = None,
+    ) -> None:
         # ignore RuntimeWarning if the array (or along an axis) has only
         # NaNs
         with warnings.catch_warnings():
@@ -305,7 +333,17 @@ def _compute_bounds(self, data, axis=None):
             self._max_value = cen + (std * self.sigma_upper)
 
     def _sigmaclip_fast(
-        self, data, axis=None, masked=True, return_bounds=False, copy=True
+        self,
+        data: ArrayLike,
+        axis: int | tuple[int, ...] | None = None,
+        masked: bool | None = True,
+        return_bounds: bool | None = False,
+        copy: bool | None = True,
+    ) -> (
+        NDArray
+        | np.ma.MaskedArray
+        | tuple[NDArray | np.ma.MaskedArray, float, float]
+        | tuple[NDArray | np.ma.MaskedArray, NDArray, NDArray]
     ):
         """
         Fast C implementation for simple use cases.
@@ -402,7 +440,13 @@ def _sigmaclip_fast(
         else:
             return result
 
-    def _sigmaclip_noaxis(self, data, masked=True, return_bounds=False, copy=True):
+    def _sigmaclip_noaxis(
+        self,
+        data: ArrayLike,
+        masked: bool | None = True,
+        return_bounds: bool | None = False,
+        copy: bool | None = True,
+    ) -> NDArray | np.ma.MaskedArray | tuple[NDArray | np.ma.MaskedArray, float, float]:
         """
         Sigma clip when ``axis`` is None and ``grow`` is not >0.
 
@@ -455,7 +499,17 @@ def _sigmaclip_noaxis(self, data, masked=True, return_bounds=False, copy=True):
             return filtered_data
 
     def _sigmaclip_withaxis(
-        self, data, axis=None, masked=True, return_bounds=False, copy=True
+        self,
+        data: ArrayLike,
+        axis: int | tuple[int, ...] | None = None,
+        masked: bool | None = True,
+        return_bounds: bool | None = False,
+        copy: bool | None = True,
+    ) -> (
+        NDArray
+        | np.ma.MaskedArray
+        | tuple[NDArray | np.ma.MaskedArray, float, float]
+        | tuple[NDArray | np.ma.MaskedArray, NDArray, NDArray]
     ):
         """
         Sigma clip the data when ``axis`` or ``grow`` is specified.
@@ -558,7 +612,19 @@ def _sigmaclip_withaxis(
         else:
             return filtered_data
 
-    def __call__(self, data, axis=None, masked=True, return_bounds=False, copy=True):
+    def __call__(
+        self,
+        data: ArrayLike,
+        axis: int | tuple[int, ...] | None = None,
+        masked: bool | None = True,
+        return_bounds: bool | None = False,
+        copy: bool | None = True,
+    ) -> (
+        NDArray
+        | np.ma.MaskedArray
+        | tuple[NDArray | np.ma.MaskedArray, float, float]
+        | tuple[NDArray | np.ma.MaskedArray, NDArray, NDArray]
+    ):
         """
         Perform sigma clipping on the provided data.
 
@@ -668,19 +734,19 @@ def __call__(self, data, axis=None, masked=True, return_bounds=False, copy=True)
 
 
 def sigma_clip(
-    data,
-    sigma=3,
-    sigma_lower=None,
-    sigma_upper=None,
-    maxiters=5,
-    cenfunc="median",
-    stdfunc="std",
-    axis=None,
-    masked=True,
-    return_bounds=False,
-    copy=True,
-    grow=False,
-):
+    data: ArrayLike,
+    sigma: float | None = 3.0,
+    sigma_lower: float | None = None,
+    sigma_upper: float | None = None,
+    maxiters: int | None = 5,
+    cenfunc: Literal["median", "mean"] | Callable | None = "median",
+    stdfunc: Literal["std", "mad_std"] | Callable | None = "std",
+    axis: int | tuple[int, ...] | None = None,
+    masked: bool | None = True,
+    return_bounds: bool | None = False,
+    copy: bool | None = True,
+    grow: float | Literal[False] | None = False,
+) -> ArrayLike | tuple[ArrayLike, float, float] | tuple[ArrayLike, ...]:
     """
     Perform sigma-clipping on the provided data.
 
@@ -877,19 +943,19 @@ def sigma_clip(
 
 
 def sigma_clipped_stats(
-    data,
-    mask=None,
-    mask_value=None,
-    sigma=3.0,
-    sigma_lower=None,
-    sigma_upper=None,
-    maxiters=5,
-    cenfunc="median",
-    stdfunc="std",
-    std_ddof=0,
-    axis=None,
-    grow=False,
-):
+    data: ArrayLike,
+    mask: NDArray | None = None,
+    mask_value: float | None = None,
+    sigma: float | None = 3.0,
+    sigma_lower: float | None = None,
+    sigma_upper: float | None = None,
+    maxiters: int | None = 5,
+    cenfunc: Literal["median", "mean"] | Callable | None = "median",
+    stdfunc: Literal["std", "mad_std"] | Callable | None = "std",
+    std_ddof: int | None = 0,
+    axis: int | tuple[int, ...] | None = None,
+    grow: float | Literal[False] | None = False,
+) -> tuple[float, float, float]:
     """
     Calculate sigma-clipped statistics on the provided data.
 
diff --git a/astropy/stats/spatial.py b/astropy/stats/spatial.py
index 28a1862eddbd..32608148f48b 100644
--- a/astropy/stats/spatial.py
+++ b/astropy/stats/spatial.py
@@ -3,10 +3,21 @@
 This module implements functions and classes for spatial statistics.
 """
 
+from __future__ import annotations
+
 import math
+from typing import TYPE_CHECKING
 
 import numpy as np
 
+if TYPE_CHECKING:
+    from typing import Literal, TypeAlias
+
+    from numpy.typing import NDArray
+
+    # TODO: consider replacing with `StrEnum` once support for Python 3.10 dropped
+    _ModeOps: TypeAlias = Literal["none", "translation", "ohser", "var-width", "ripley"]
+
 __all__ = ["RipleysKEstimator"]
 
 
@@ -57,7 +68,14 @@ class RipleysKEstimator:
        Point Fields, Akademie Verlag GmbH, Chichester.
     """
 
-    def __init__(self, area, x_max=None, y_max=None, x_min=None, y_min=None):
+    def __init__(
+        self,
+        area: float,
+        x_max: float | None = None,
+        y_max: float | None = None,
+        x_min: float | None = None,
+        y_min: float | None = None,
+    ) -> None:
         self.area = area
         self.x_max = x_max
         self.y_max = y_max
@@ -65,22 +83,22 @@ def __init__(self, area, x_max=None, y_max=None, x_min=None, y_min=None):
         self.y_min = y_min
 
     @property
-    def area(self):
+    def area(self) -> float:
         return self._area
 
     @area.setter
-    def area(self, value):
+    def area(self, value: float) -> None:
         if isinstance(value, (float, int)) and value > 0:
             self._area = value
         else:
             raise ValueError(f"area is expected to be a positive number. Got {value}.")
 
     @property
-    def y_max(self):
+    def y_max(self) -> float | None:
         return self._y_max
 
     @y_max.setter
-    def y_max(self, value):
+    def y_max(self, value: float | None) -> None:
         if value is None or isinstance(value, (float, int)):
             self._y_max = value
         else:
@@ -89,11 +107,11 @@ def y_max(self, value):
             )
 
     @property
-    def x_max(self):
+    def x_max(self) -> float | None:
         return self._x_max
 
     @x_max.setter
-    def x_max(self, value):
+    def x_max(self, value: float | None) -> None:
         if value is None or isinstance(value, (float, int)):
             self._x_max = value
         else:
@@ -102,31 +120,36 @@ def x_max(self, value):
             )
 
     @property
-    def y_min(self):
+    def y_min(self) -> float | None:
         return self._y_min
 
     @y_min.setter
-    def y_min(self, value):
+    def y_min(self, value: float | None) -> None:
         if value is None or isinstance(value, (float, int)):
             self._y_min = value
         else:
             raise ValueError(f"y_min is expected to be a real number. Got {value}.")
 
     @property
-    def x_min(self):
+    def x_min(self) -> float | None:
         return self._x_min
 
     @x_min.setter
-    def x_min(self, value):
+    def x_min(self, value: float | None) -> None:
         if value is None or isinstance(value, (float, int)):
             self._x_min = value
         else:
             raise ValueError(f"x_min is expected to be a real number. Got {value}.")
 
-    def __call__(self, data, radii, mode="none"):
+    def __call__(
+        self,
+        data: NDArray[float],
+        radii: NDArray[float],
+        mode: _ModeOps = "none",
+    ) -> NDArray[float]:
         return self.evaluate(data=data, radii=radii, mode=mode)
 
-    def _pairwise_diffs(self, data):
+    def _pairwise_diffs(self, data: NDArray[float]) -> NDArray[float]:
         npts = len(data)
         diff = np.zeros(shape=(npts * (npts - 1) // 2, 2), dtype=np.double)
         k = 0
@@ -137,7 +160,7 @@ def _pairwise_diffs(self, data):
 
         return diff
 
-    def poisson(self, radii):
+    def poisson(self, radii: NDArray[float]) -> NDArray[float]:
         """
         Evaluates the Ripley K function for the homogeneous Poisson process,
         also known as Complete State of Randomness (CSR).
@@ -154,21 +177,36 @@ def poisson(self, radii):
         """
         return np.pi * radii * radii
 
-    def Lfunction(self, data, radii, mode="none"):
+    def Lfunction(
+        self,
+        data: NDArray[float],
+        radii: NDArray[float],
+        mode: _ModeOps = "none",
+    ) -> NDArray[float]:
         """
         Evaluates the L function at ``radii``. For parameter description
         see ``evaluate`` method.
         """
         return np.sqrt(self.evaluate(data, radii, mode=mode) / np.pi)
 
-    def Hfunction(self, data, radii, mode="none"):
+    def Hfunction(
+        self,
+        data: NDArray[float],
+        radii: NDArray[float],
+        mode: _ModeOps = "none",
+    ) -> NDArray[float]:
         """
         Evaluates the H function at ``radii``. For parameter description
         see ``evaluate`` method.
         """
         return self.Lfunction(data, radii, mode=mode) - radii
 
-    def evaluate(self, data, radii, mode="none"):
+    def evaluate(
+        self,
+        data: NDArray[float],
+        radii: NDArray[float],
+        mode: _ModeOps = "none",
+    ) -> NDArray[float]:
         """
         Evaluates the Ripley K estimator for a given set of values ``radii``.
 
diff --git a/docs/nitpick-exceptions b/docs/nitpick-exceptions
index f96025412981..b43d08c5f0a2 100644
--- a/docs/nitpick-exceptions
+++ b/docs/nitpick-exceptions
@@ -88,6 +88,9 @@ py:class Real
 py:class np.number
 # numpy.typing
 py:class NDArray
+py:class ArrayLike
+# np.ma
+py:class np.ma.MaskedArray
 # locally defined type variable for ndarray dtype
 py:class DT
 
diff --git a/docs/whatsnew/7.0.rst b/docs/whatsnew/7.0.rst
index ca388cb92a25..797433b079ec 100644
--- a/docs/whatsnew/7.0.rst
+++ b/docs/whatsnew/7.0.rst
@@ -16,6 +16,7 @@ In particular, this release includes:
 * :ref:`whatsnew_7_0_quantity_to_string_formatter`
 * :ref:`whatsnew_7_0_ecsv_meta_default_dict`
 * :ref:`whatsnew_7_0_contributor_doc_improvement`
+* :ref:`whatsnew_7_0_typing_stats`
 
 In addition to these major changes, Astropy v7.0 includes a large number of
 smaller improvements and bug fixes, which are described in the :ref:`changelog`.
@@ -95,6 +96,14 @@ addition, the developer documentation was reorganized and simplified where possi
 improve readability and accessibility. We welcome continued feedback on how to make
 contributing to Astropy even easier and more enjoyable.
 
+.. _whatsnew_7_0_typing_stats:
+
+Typing in astropy.stats
+=======================
+
+The ``astropy.stats`` module is now fully typed. This is the first subpackage for
+which this the case.
+
 Full change log
 ===============