-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
RelationshipToLatest
Gord Thompson edited this page May 2, 2024
·
3 revisions
Apply a correlated subquery to relationship()
in order to retrieve only the latest (or earliest) item of a collection.
import datetime
from sqlalchemy import Column, Date, ForeignKey, Integer, and_, create_engine, func, select
from sqlalchemy.orm import Session, aliased, contains_eager, declarative_base, joinedload, relationship
Base = declarative_base()
class A(Base):
__tablename__ = "a"
id = Column(Integer, primary_key=True)
bs = relationship("B")
latest_b = relationship(
lambda: B,
primaryjoin=lambda: and_(
A.id == B.a_id,
B.date
== select(func.max(B.date))
.where(B.a_id == A.id)
.correlate(A.__table__)
.scalar_subquery(),
),
viewonly=True,
)
class B(Base):
__tablename__ = "b"
id = Column(Integer, primary_key=True)
a_id = Column(Integer, ForeignKey("a.id"))
date = Column(Date)
def __repr__(self):
return f"B(date={repr(self.date)})"
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
with Session(e) as s:
s.add_all(
[
A(
bs=[
B(date=datetime.date(2011, 10, 5)),
B(date=datetime.date(2011, 8, 4)),
B(date=datetime.date(2011, 9, 17)),
]
),
A(
bs=[
B(date=datetime.date(2012, 2, 2)),
B(date=datetime.date(2012, 3, 3)),
B(date=datetime.date(2012, 4, 4)),
]
),
]
)
s.commit()
for obj in s.scalars(select(A).options(joinedload(A.latest_b))).unique():
print(obj.latest_b)
The SQL we see when using joinedload()
in this way will apply the correlated subquery into the ON clause of the join:
SELECT a.id, b_1.id AS id_1, b_1.a_id, b_1.date
FROM a
LEFT OUTER JOIN b AS b_1 ON
a.id = b_1.a_id AND
b_1.date = (
SELECT max(b_1.date) AS max_1 FROM b AS b_1
WHERE b_1.a_id = a.id
)
We can build SQL with a nicer query plan by instead using contains_eager()
in conjunction with explicit usage of join()
:
subq = (
select(B.a_id, func.max(B.date).label("date"))
.group_by(B.a_id)
.subquery()
)
b_subq = aliased(B, subq)
result = s.scalars(
select(A)
.join(A.bs)
.join(A.bs.of_type(b_subq))
.where(b_subq.date == B.date)
.options(contains_eager(A.latest_b))
).unique()
for obj in result:
print(obj.latest_b)
Instead of a correlated subquery in an ON clause, this produces two separate targets to JOIN on. The set intersection is also easier to see here:
SELECT b.id, b.a_id, b.date, a.id AS id_1
FROM a
JOIN b ON a.id = b.a_id
JOIN (
SELECT b.a_id AS a_id, max(b.date) AS date
FROM b GROUP BY b.a_id
) AS anon_1 ON a.id = anon_1.a_id
WHERE b.date = b.date