8000 Sphinx support: Add support for RSS (#1934) · python/peps@b8b6424 · GitHub
[go: up one dir, main page]

Skip to content

Commit b8b6424

Browse files
authored
Sphinx support: Add support for RSS (#1934)
See #2, #1385 for context. This is the RSS generation part, building on PR #1930. It contains the logic for generating RSS This was originally in #1385 and #1565, split out for ease of review
1 parent 35b0cc1 commit b8b6424

File tree

3 files changed

+153
-1
lines changed

3 files changed

+153
-1
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,11 @@ lint:
6060
SPHINX_JOBS=8
6161
SPHINX_BUILD=$(PYTHON) build.py -j $(SPHINX_JOBS)
6262

63-
pages: rss
63+
# TODO replace `rss:` with this when merged & tested
64+
pep_rss:
65+
$(PYTHON) pep_rss_gen.py
66+
67+
pages: pep_rss
6468
$(SPHINX_BUILD) --index-file
6569

6670
sphinx:

pep_rss_gen.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
import datetime
2+
import email.utils
3+
from pathlib import Path
4+
import re
5+
6+
from dateutil import parser
7+
import docutils.frontend
8+
import docutils.nodes
9+
import docutils.parsers.rst
10+
import docutils.utils
11+
from feedgen import entry
12+
from feedgen import feed
13+
14+
15+
# Monkeypatch feedgen.util.formatRFC2822
16+
def _format_rfc_2822(dt: datetime.datetime) -> str:
17+
return email.utils.format_datetime(dt, usegmt=True)
18+
19+
20+
entry.formatRFC2822 = feed.formatRFC2822 = _format_rfc_2822
21+
line_cache: dict[Path, dict[str, str]] = {}
22+
23+
24+
def first_line_starting_with(full_path: Path, text: str) -> str:
25+
# Try and retrieve from cache
26+
if full_path in line_cache:
27+
return line_cache[full_path].get(text, "")
28+
29+
# Else read source
30+
line_cache[full_path] = path_cache = {}
31+
for line in full_path.open(encoding="utf-8"):
32+
if line.startswith("Created:"):
33+
path_cache["Created:"] = line.removeprefix("Created:").strip()
34+
elif line.startswith("Title:"):
35+
path_cache["Title:"] = line.removeprefix("Title:").strip()
36+
elif line.startswith("Author:"):
37+
path_cache["Author:"] = line.removeprefix("Author:").strip()
38+
39+
# Once all have been found, exit loop
40+
if path_cache.keys == {"Created:", "Title:", "Author:"}:
41+
break
42+
return path_cache.get(text, "")
43+
44+
45+
def pep_creation(full_path: Path) -> datetime.datetime:
46+
created_str = first_line_starting_with(full_path, "Created:")
47+
# bleh, I was hoping to avoid re but some PEPs editorialize on the Created line
48+
# (note as of Aug 2020 only PEP 102 has additional content on the Created line)
49+
m = re.search(r"(\d+[- ][\w\d]+[- ]\d{2,4})", created_str)
50+
if not m:
51+
# some older ones have an empty line, that's okay, if it's old we ipso facto don't care about it.
52+
# "return None" would make the most sense but datetime objects refuse to compare with that. :-|
53+
return datetime.datetime(1900, 1, 1)
54+
created_str = m.group(1)
55+
try:
56+
return parser.parse(created_str, dayfirst=True)
57+
except (ValueError, OverflowError):
58+
return datetime.datetime(1900, 1, 1)
59+
60+
61+
def parse_rst(text: str) -> docutils.nodes.document:
62+
rst_parser = docutils.parsers.rst.Parser()
63+
components = (docutils.parsers.rst.Parser,)
64+
settings = docutils.frontend.OptionParser(components=components).get_default_values()
65+
document = docutils.utils.new_document('<rst-doc>', settings=settings)
66+
rst_parser.parse(text, document)
67+
return document
68+
69+
70+
def pep_abstract(full_path: Path) -> str:
71+
"""Return the first paragraph of the PEP abstract"""
72+
text = full_path.read_text(encoding="utf-8")
73+
for node in parse_rst(text):
74+
if "<title>Abstract</title>" in str(node):
75+
for child in node:
76+
if child.tagname == "paragraph":
77+
return child.astext().strip().replace("\n", " ")
78+
return ""
79+
80+
81+
def main():
82+
# get the directory with the PEP sources
83+
pep_dir = Path(__file__).parent
84+
85+
# get list of peps with creation time (from "Created:" string in pep source)
86+
peps_with_dt = sorted((pep_creation(path), path) for path in pep_dir.glob("pep-????.*"))
87+
88+
# generate rss items for 10 most recent peps
89+
items = []
90+
for dt, full_path in peps_with_dt[-10:]:
91+
try:
92+
pep_num = int(full_path.stem.split("-")[-1])
93+
except ValueError:
94+
continue
95+
96+
title = first_line_starting_with(full_path, "Title:")
97+
author = first_line_starting_with(full_path, "Author:")
98+
if "@" in author or " at " in author:
99+
parsed_authors = email.utils.getaddresses([author])
100+
# ideal would be to pass as a list of dicts with names and emails to
101+
# item.author, but FeedGen's RSS <author/> output doesn't pass W3C
102+
# validation (as of 12/06/2021)
103+
joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
104+
else:
105+
joined_authors = author
106+
url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}"
107+
108+
item = entry.FeedEntry()
109+
item.title(f"PEP {pep_num}: {title}")
110+
item.link(href=url)
111+
item.description(pep_abstract(full_path))
112+
item.guid(url, permalink=True)
113+
item.published(dt.replace(tzinfo=datetime.timezone.utc)) # ensure datetime has a timezone
114+
item.author(email=joined_authors)
115+
items.append(item)
116+
117+
# The rss envelope
118+
desc = """
119+
Newest Python Enhancement Proposals (PEPs) - Information on new
120+
language features, and some meta-information like release
121+
procedure and schedules.
122+
""".replace("\n ", " ").strip()
123+
124+
# Setup feed generator
125+
fg = feed.FeedGenerator()
126+
fg.language("en")
127+
fg.generator("")
128+
fg.docs("https://cyber.harvard.edu/rss/rss.html")
129+
130+
# Add metadata
131+
fg.title("Newest Python PEPs")
132+
fg.link(href="https://www.python.org/dev/peps")
133+
fg.link(href="https://www.python.org/dev/peps/peps.rss", rel="self")
134+
fg.description(desc)
135+
fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))
136+
137+
# Add PEP information (ordered by newest first)
138+
for item in items:
139+
fg.add_entry(item)
140+
141+
pep_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
142+
143+
144+
if __name__ == "__main__":
145+
main()

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
# Requirements for building PEPs with Sphinx
22
sphinx >= 3.5
33
docutils >= 0.16
4+
5+
# For RSS
6+
feedgen >= 0.9.0 # For RSS feed

0 commit comments

Comments
 (0)
0