8000 RelationshipToLatest · sqlalchemy/sqlalchemy Wiki · GitHub
[go: up one dir, main page]

Skip to content

RelationshipToLatest

Gord Thompson edited this page May 2, 2024 · 3 revisions

RelationshipToLatest

Apply a correlated subquery to relationship() in order to retrieve only the latest (or earliest) item of a collection.

import datetime

from sqlalchemy import Column, Date, ForeignKey, Integer, and_, create_engine, func, select
from sqlalchemy.orm import Session, aliased, contains_eager, declarative_base, joinedload, relationship

Base = declarative_base()


class A(Base):
    __tablename__ = "a"
    id = Column(Integer, primary_key=True)
    bs = relationship("B")

    latest_b = relationship(
        lambda: B,
        primaryjoin=lambda: and_(
            A.id == B.a_id,
            B.date
            == select(func.max(B.date))
            .where(B.a_id == A.id)
            .correlate(A.__table__)
            .scalar_subquery(),
        ),
        viewonly=True,
    )


class B(Base):
    __tablename__ = "b"
    id = Column(Integer, primary_key=True)
    a_id = Column(Integer, ForeignKey("a.id"))
    date = Column(Date)

    def __repr__(self):
        return f"B(date={repr(self.date)})"


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

with Session(e) as s:
    s.add_all(
        [
            A(
                bs=[
                    B(date=datetime.date(2011, 10, 5)),
                    B(date=datetime.date(2011, 8, 4)),
                    B(date=datetime.date(2011, 9, 17)),
                ]
            ),
            A(
                bs=[
                    B(date=datetime.date(2012, 2, 2)),
                    B(date=datetime.date(2012, 3, 3)),
                    B(date=datetime.date(2012, 4, 4)),
                ]
            ),
        ]
    )
    s.commit()

    for obj in s.scalars(select(A).options(joinedload(A.latest_b))).unique():
        print(obj.latest_b)

The SQL we see when using joinedload() in this way will apply the correlated subquery into the ON clause of the join:

SELECT a.id, b_1.id AS id_1, b_1.a_id, b_1.date 
    FROM a 
    LEFT OUTER JOIN b AS b_1 ON
        a.id = b_1.a_id AND 
        b_1.date = (
            SELECT max(b_1.date) AS max_1 FROM b AS b_1 
            WHERE b_1.a_id = a.id
            )

We can build SQL with a nicer query plan by instead using contains_eager() in conjunction with explicit usage of join():

    subq = (
        select(B.a_id, func.max(B.date).label("date"))
        .group_by(B.a_id)
        .subquery()
    )
    b_subq = aliased(B, subq)

    result = s.scalars(
        select(A)
        .join(A.bs)
        .join(A.bs.of_type(b_subq))
        .where(b_subq.date == B.date)
        .options(contains_eager(A.latest_b))
    ).unique()
    for obj in result:
        print(obj.latest_b)

Instead of a correlated subquery in an ON clause, this produces two separate targets to JOIN on. The set intersection is also easier to see here:

SELECT b.id, b.a_id, b.date, a.id AS id_1 
FROM a 
JOIN b ON a.id = b.a_id 
JOIN (
    SELECT b.a_id AS a_id, max(b.date) AS date 
        FROM b GROUP BY b.a_id
    ) AS anon_1 ON a.id = anon_1.a_id 
WHERE b.date = b.date
Clone this wiki locally
0