8000 bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529) · python/cpython@3df02db · GitHub
[go: up one dir, main page]

Skip to content

Commit 3df02db

Browse files
berkerpeksagrhettinger
authored andcommitted
bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (#4529)
1 parent 0858495 commit 3df02db

File tree

4 files changed

+19
-12
lines changed

4 files changed

+19
-12
lines changed

Doc/library/urllib.robotparser.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
6969
.. method:: request_rate(useragent)
7070

7171
Returns the contents of the ``Request-rate`` parameter from
72-
``robots.txt`` in the form of a :func:`~collections.namedtuple`
73-
``(requests, seconds)``. If there is no such parameter or it doesn't
74-
apply to the *useragent* specified or the ``robots.txt`` entry for this
75-
parameter has invalid syntax, return ``None``.
72+
``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``.
73+
If there is no such parameter or it doesn't apply to the *useragent*
74+
specified or the ``robots.txt`` entry for this parameter has invalid
75+
syntax, return ``None``.
7676

7777
.. versionadded:: 3.6
7878

Lib/test/test_robotparser.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import threading
44
import unittest
55
import urllib.robotparser
6-
from collections import namedtuple
76
from test import support
87
from http.server import BaseHTTPRequestHandler, HTTPServer
98

@@ -87,6 +86,10 @@ def test_request_rate(self):
8786
self.parser.crawl_delay(agent), self.crawl_delay
8887
)
8988
if self.request_rate:
89+
self.assertIsInstance(
90+
self.parser.request_rate(agent),
91+
urllib.robotparser.RequestRate
92+
)
9093
self.assertEqual(
9194
self.parser.request_rate(agent).requests,
9295
self.request_rate.requests
@@ -108,7 +111,7 @@ class CrawlDelayAndRequestRateTest(BaseRequestRateTest, unittest.TestCase):
108111
Disallow: /%7ejoe/index.html
109112
"""
110113
agent = 'figtree'
111-
request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
114+
request_rate = urllib.robotparser.RequestRate(9, 30)
112115
crawl_delay = 3
113116
good = [('figtree', '/foo.html')]
114117
bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@@ -237,7 +240,7 @@ class DefaultEntryTest(BaseRequestRateTest, unittest.TestCase):
237240
Request-rate: 3/15
238241
Disallow: /cyberworld/map/
239242
"""
240-
request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
243+
request_rate = urllib.robotparser.RequestRate(3, 15)
241244
crawl_delay = 1
242245
good = ['/', '/test.html']
243246
bad = ['/cyberworld/map/index.html']

Lib/urllib/robotparser.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
__all__ = ["RobotFileParser"]
1818

19+
RequestRate = collections.namedtuple("RequestRate", "requests seconds")
20+
21+
1922
class RobotFileParser:
2023
""" This class provides a set of methods to read, parse and answer
2124
questions about a single robots.txt file.
@@ -136,11 +139,7 @@ def parse(self, lines):
136139
# check if all values are sane
137140
if (len(numbers) == 2 and numbers[0].strip().isdigit()
138141
and numbers[1].strip().isdigit()):
139-
req_rate = collections.namedtuple('req_rate',
140-
'requests seconds')
141-
entry.req_rate = req_rate
142-
entry.req_rate.requests = int(numbers[0])
143-
entry.req_rate.seconds = int(numbers[1])
142+
entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
144143
state = 2
145144
if state == 2:
146145
self._add_entry(entry)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix wrong usage of :func:`collections.namedtuple` in
2+
the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
3+
method.
4+
5+
Initial patch by Robin Wellner.

0 commit comments

Comments
 (0)
0