Py27 compat and updated tests / logic

pandas-dev · datapythonista · Aug 17, 2018 · Mar 8, 2018 · Mar 8, 2018 · Mar 9, 2018
commit 876337f22ad073c301f7fd891f34ca55bf8b5d4f
diff --git a/pandas/tests/scripts/test_validate_docstrings.py b/pandas/tests/scripts/test_validate_docstrings.py
@@ -1,7 +1,7 @@
 import os
 import sys
 
-import numpy
+import numpy as np
 import pytest
 
 
@@ -79,7 +79,8 @@ def sample_values(self):
             yield random.random()
 
     def head(self):
-        """Return the first 5 elements of the Series.
+        """
+        Return the first 5 elements of the Series.
 
         This function is mainly useful to preview the values of the
         Series without displaying the whole of it.
@@ -98,7 +99,8 @@ def head(self):
         return self.iloc[:5]
 
     def head1(self, n=5):
-        """Return the first elements of the Series.
+        """
+        Return the first elements of the Series.
 
         This function is mainly useful to preview the values of the
         Series without displaying the whole of it.
@@ -108,9 +110,9 @@ def head1(self, n=5):
         n : int
             Number of values to return.
 
-        Return
-        ------
-        pandas.Series
+        Returns
+        -------
+        Series
             Subset of the original series with the n first values.
 
         See Also
@@ -119,8 +121,7 @@ def head1(self, n=5):
 
         Examples
         --------
-        >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon',
-        ...                'Lion', 'Monkey', 'Rabbit', 'Zebra'])
+        >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon'])
         >>> s.head()
         0   Ant
         1   Bear
@@ -139,40 +140,49 @@ def head1(self, n=5):
         """
         return self.iloc[:n]
 
-    def contains(self, pattern, case_sensitive=True, na=numpy.nan):
+    def contains(self, pat, case=True, na=np.nan):
         """
-        Return whether each value contains `pattern`.
+        Return whether each value contains `pat`.
 
         In this case, we are illustrating how to use sections, even
         if the example is simple enough and does not require them.
 
+        Parameters
+        ----------
+        pat : str
+            Pattern to check for within each element.
+        case : bool, default True
+            Whether check should be done with case sensitivity.
+        na : object, default np.nan
+            Fill value for missing data.
+
         Examples
         --------
-        >>> s = pd.Series('Antelope', 'Lion', 'Zebra', numpy.nan)
-        >>> s.contains(pattern='a')
+        >>> s = pd.Series(['Antelope', 'Lion', 'Zebra', np.nan])
+        >>> s.str.contains(pat='a')
         0    False
         1    False
         2     True
         3      NaN
-        dtype: bool
+        dtype: object
 
         **Case sensitivity**
 
         With `case_sensitive` set to `False` we can match `a` with both
         `a` and `A`:
 
-        >>> s.contains(pattern='a', case_sensitive=False)
+        >>> s.str.contains(pat='a', case=False)
         0     True
         1    False
         2     True
         3      NaN
-        dtype: bool
+        dtype: object
 
         **Missing values**
 
         We can fill missing values in the output using the `na` parameter:
 
-        >>> s.contains(pattern='a', na=False)
+        >>> s.str.contains(pat='a', na=False)
         0    False
         1    False
         2     True
@@ -181,20 +191,6 @@ def contains(self, pattern, case_sensitive=True, na=numpy.nan):
         """
         pass
 
-    def plot2(self):
-        """
-        Generate a plot with the `Series` data.
-
-        Examples
-        --------
-
-        .. plot::
-            :context: close-figs
-
-        >>> s = pd.Series([1, 2, 3])
-        >>> s.plot()
-        """
-        pass    
 
 class BadDocStrings(object):
 
@@ -285,7 +281,7 @@ def method(self, foo=None, bar=None):
         to understand.
 
         Try to avoid positional arguments like in `df.method(1)`. They
-        can be all right if previously defined with a meaningful name,
+        can be alright if previously defined with a meaningful name,
         like in `present_value(interest_rate)`, but avoid them otherwise.
 
         When presenting the behavior with different parameters, do not place
@@ -296,19 +292,30 @@ def method(self, foo=None, bar=None):
         --------
         >>> import numpy as np
         >>> import pandas as pd
-        >>> df = pd.DataFrame(numpy.random.randn(3, 3),
+        >>> df = pd.DataFrame(np.ones((3, 3)),
         ...                   columns=('a', 'b', 'c'))
-        >>> df.method(1)
-        21
-        >>> df.method(bar=14)
-        123
+        >>> df.all(1)
+        0    True
+        1    True
+        2    True
+        dtype: bool
+        >>> df.all(bool_only=True)
+        Series([], dtype: bool)
         """
         pass
 
 class TestValidator(object):
 
     @pytest.fixture(autouse=True, scope="class")
     def import_scripts(self):
+        """
+        Because the scripts directory is above the top level pandas package
+        we need to hack sys.path to know where to find that directory for
+        import. The below traverses up the file system to find the scripts
+        directory, adds to location to sys.path and imports the required
+        module into the global namespace before as part of class setup,
+        reverting those changes on teardown.
+        """
         up = os.path.dirname
         file_dir = up(os.path.abspath(__file__))
         script_dir = os.path.join(up(up(up(file_dir))), 'scripts')
@@ -321,7 +328,13 @@ def import_scripts(self):
 
     @pytest.mark.parametrize("func", [
         'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1',
-        'contains', 'plot2'])
+        'contains'])
     def test_good_functions(self, func):
         assert validate_one('pandas.tests.scripts.test_validate_docstrings'
                             '.GoodDocStrings.' + func) == 0
+
+    @pytest.mark.parametrize("func", [
+        'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method'])
+    def test_bad_functions(self, func):
+        assert validate_one('pandas.tests.scripts.test_validate_docstrings'
+                            '.BadDocStrings.' + func) > 0
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
@@ -34,6 +34,7 @@
 
 sys.path.insert(0, os.path.join(BASE_PATH))
 import pandas
+from pandas.compat import signature
 
 sys.path.insert(1, os.path.join(BASE_PATH, 'doc', 'sphinxext'))
 from numpydoc.docscrape import NumpyDocString
@@ -185,11 +186,17 @@ def signature_parameters(self):
             # accessor classes have a signature, but don't want to show this
             return tuple()
         try:
-            params = self.method_obj.__code__.co_varnames
+            sig = signature(self.method_obj)
         except (TypeError, ValueError):
             # Some objects, mainly in C extensions do not support introspection
             # of the signature
             return tuple()
+        params = sig.args
+        if sig.varargs:
+            params.append("*" + sig.varargs)
+        if sig.keywords:
+            params.append("**" + sig.keywords)
+        params = tuple(params)
         if params and params[0] in ('self', 'cls'):
             return params[1:]
         return params
@@ -237,6 +244,10 @@ def examples(self):
     def returns(self):
         return self.doc['Returns']
 
+    @property
+    def method_source(self):
+        return inspect.getsource(self.method_obj)
+
     @property
     def first_line_ends_in_dot(self):
         if self.doc:
@@ -376,13 +387,27 @@ def validate_all():
 
 
 def validate_one(func_name):
+    """
+    Validate the docstring for the given func_name
+
+    Parameters
+    ----------
+    func_name : function
+        Function whose docstring will be evaluated
+
+    Returns
+    -------
+    int
+        The number of errors found in the `func_name` docstring
+    """
     func_obj = _load_obj(func_name)
     doc = Docstring(func_name, func_obj)
 
     sys.stderr.write(_output_header('Docstring ({})'.format(func_name)))
     sys.stderr.write('{}\n'.format(doc.clean_doc))
 
     errs = []
+    wrns = []
     if doc.start_blank_lines != 1:
         errs.append('Docstring text (summary) should start in the line '
                     'immediately after the opening quotes (not in the same '
@@ -410,16 +435,17 @@ def validate_one(func_name):
                         'not third person (e.g. use "Generate" instead of '
                         '"Generates")')
     if not doc.extended_summary:
-        errs.append('No extended summary found')
+        wrns.append('No extended summary found')
 
     param_errs = doc.parameter_mismatches
     for param in doc.doc_parameters:
-        if not doc.parameter_type(param):
-            param_errs.append('Parameter "{}" has no type'.format(param))
-        else:
-            if doc.parameter_type(param)[-1] == '.':
-                param_errs.append('Parameter "{}" type '
-                                  'should not finish with "."'.format(param))
+        if not param.startswith("*"):  # Check can ignore var / kwargs
+            if not doc.parameter_type(param):
+                param_errs.append('Parameter "{}" has no type'.format(param))
+            else:
+                if doc.parameter_type(param)[-1] == '.':
+                    param_errs.append('Parameter "{}" type '
+                                      'should not finish with "."'.format(param))
 
         if not doc.parameter_desc(param):
             param_errs.append('Parameter "{}" '
@@ -437,24 +463,28 @@ def validate_one(func_name):
         for param_err in param_errs:
             errs.append('\t{}'.format(param_err))
 
-    if not doc.returns:
-        errs.append('No returns section found')
+    if not doc.returns and "return" in doc.method_source:
+        errs.append('No Returns section found')
+    if "yield" in doc.method_source:
+        # numpydoc is not correctly parsing Yields sections, so
+        # best we can do is warn the user to lookout for this...
+        wrns.append('Yield found in source - please make sure to document!')
 
     mentioned_errs = doc.mentioned_private_classes
     if mentioned_errs:
         errs.append('Private classes ({}) should not be mentioned in public '
                     'docstring.'.format(mentioned_errs))
 
     if not doc.see_also:
-        errs.append('See Also section not found')
+        wrns.append('See Also section not found')
     else:
         for rel_name, rel_desc in doc.see_also.items():
             if not rel_desc:
                 errs.append('Missing description for '
                             'See Also "{}" reference'.format(rel_name))
     examples_errs = ''
     if not doc.examples:
-        errs.append('No examples section found')
+        wrns.append('No examples section found')
     else:
         examples_errs = doc.examples_errors
         if examples_errs:
@@ -465,7 +495,12 @@ def validate_one(func_name):
         sys.stderr.write('Errors found:\n')
         for err in errs:
             sys.stderr.write('\t{}\n'.format(err))
-    else:
+    if wrns:
+        sys.stderr.write('Warnings found:\n')
+        for wrn in wrns:
+            sys.stderr.write('\t{}\n'.format(wrn))
+
+    if not errs:
         sys.stderr.write('Docstring for "{}" correct. :)\n'.format(func_name))
 
     if examples_errs: