8000 Changes to validate_docstring script to be able to check all docstrings at once by datapythonista · Pull Request #22408 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

Changes to validate_docstring script to be able to check all docstrings at once #22408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Oct 13, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adding tests to the function that parses api.rst
  • Loading branch information
datapythonista committed Sep 29, 2018
commit 1d2c5c0422e1e98d1635a3717a101a313e89b868
77 changes: 77 additions & 0 deletions scripts/tests/test_validate_docstrings.py
8000
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import string
import random
import io
import pytest
import numpy as np

Expand Down Expand Up @@ -605,3 +606,79 @@ def test_bad_examples(self, capsys, klass, func, msgs):
result = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821
for msg in msgs:
assert msg in ' '.join(result['errors'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit but what does this add? Can't we just do the normal inclusion check against the list instead of joining into a string?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

msg in result['errors'] is different than this, for example 'a' in 'foo bar' is True, but a in ['foo', 'bar'] is False.

I think the .join() is simpler than another loop, is it what you would do, or you were thinking on the previous case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No but I thought msg would explicitly match one of the errors; will take a look more deeply on next review if that assumption is incorrect

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Debugging this locally still don't think the string concatenation is necessary?

image



class TestApiItems(object):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also minor but since this test isn't used for any testing (i.e. not collected by the runner) can we remove the Test prefix?

@property
def api_doc(self):
return io.StringIO('''
.. currentmodule:: itertools

Itertools
---------

Infinite
~~~~~~~~

.. autosummary::

cycle
count

Finite
~~~~~~

.. autosummary::

chain

.. currentmodule:: random

Random
------

All
~~~

.. autosummary::

seed
randint
''')

@pytest.mark.parametrize('idx,name', [(0, 'itertools.cycle'),
(1, 'itertools.count'),
(2, 'itertools.chain'),
(3, 'random.seed'),
(4, 'random.randint')])
def test_item_name(self, idx, name):
res = list(validate_docstrings.get_api_items(self.api_doc))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use result instead of res?

assert res[idx][0] == name

@pytest.mark.parametrize('idx,func', [(0, 'cycle'),
(1, 'count'),
(2, 'chain'),
(3, 'seed'),
(4, 'randint')])
def test_item_function(self, idx, func):
res = list(validate_docstrings.get_api_items(self.api_doc))
assert callable(res[idx][1])
assert res[idx][1].__name__ == func

@pytest.mark.parametrize('idx,section', [(0, 'Itertools'),
(1, 'Itertools'),
(2, 'Itertools'),
(3, 'Random'),
(4, 'Random')])
def test_item_section(self, idx, section):
res = list(validate_docstrings.get_api_items(self.api_doc))
assert res[idx][2] == section

@pytest.mark.parametrize('idx,subsection', [(0, 'Infinite'),
(1, 'Infinite'),
(2, 'Finite'),
(3, 'All'),
(4, 'All')])
def test_item_subsection(self, idx, subsection):
res = list(validate_docstrings.get_api_items(self.api_doc))
assert res[idx][3] == subsection
75 changes: 40 additions & 35 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,17 @@
DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated']


def get_api_items():
def get_api_items(api_doc_fd):
"""
Parse api.rst file from the documentation, and extract all the functions,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we make the first sentence one line?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change this? Maybe missed on prior review

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You've got the one liner summary above.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My mistake - thanks for clarifying

methods, classes, attributes... This should include all pandas public API.

Parameters
----------
api_doc_fd : file descriptor
A file descriptor of the API documentation page, containing the table
of contents with all the public API.

Yields
------
name : str
Expand All @@ -64,47 +70,44 @@ def get_api_items():
The name of the subsection in the API page where the object item is
located.
"""
api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst')

previous_line = current_section = current_subsection = ''
position = None
with open(api_fname) as f:
for line in f:
line = line.strip()
if len(line) == len(previous_line):
if set(line) == set('-'):
current_section = previous_line
continue
if set(line) == set('~'):
current_subsection = previous_line
continue

if line.startswith('.. currentmodule::'):
current_module = line.replace('.. currentmodule::', '').strip()
for line in api_doc_fd:
line = line.strip()
if len(line) == len(previous_line):
if set(line) == set('-'):
current_section = previous_line
continue

if line == '.. autosummary::':
position = 'autosummary'
if set(line) == set('~'):
current_subsection = previous_line
continue

if position == 'autosummary':
if line == '':
position = 'items'
continue
if line.startswith('.. currentmodule::'):
current_module = line.replace('.. currentmodule::', '').strip()
continue

if position == 'items':
if line == '':
position = None
continue
item = line.strip()
func = importlib.import_module(current_module)
for part in item.split('.'):
func = getattr(func, part)
if line == '.. autosummary::':
position = 'autosummary'
continue

if position == 'autosummary':
if line == '':
position = 'items'
continue

if position == 'items':
if line == '':
position = None
continue
item = line.strip()
func = importlib.import_module(current_module)
for part in item.split('.'):
func = getattr(func, part)

yield ('.'.join([current_module, item]), func,
current_section, current_subsection)
yield ('.'.join([current_module, item]), func,
current_section, current_subsection)

previous_line = line
previous_line = line


class Docstring(object):
Expand Down Expand Up @@ -534,7 +537,9 @@ def validate_all():
seen = {}

# functions from the API docs
api_items = list(get_api_items())
api_doc_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst')
with open(api_doc_fname) as f:
api_items = list(get_api_items(f))
for func_name, func_obj, section, subsection in api_items:
doc_info = validate_one(func_name)
result[func_name] = doc_info
Expand Down
0