8000 read_html: Handle colspan and rowspan by adamhooper · Pull Request #21487 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

read_html: Handle colspan and rowspan #21487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 5, 2018
Merged
Prev Previous commit
Next Next commit
Docstring tweaks
  • Loading branch information
adamhooper committed Jun 27, 2018
commit 34f87cb9338385f91d3df204b666e7dd5b8cfb65
34 changes: 20 additions & 14 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def parse_tables(self):

Returns
-------
tables : list of parsed (header, body, footer) tuples from tables
list of parsed (header, body, footer) tuples from tables.
"""
tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
return (self._parse_thead_tbody_tfoot(table) for table in tables)
Expand All @@ -233,7 +233,7 @@ def _attr_getter(self, obj, attr):

Returns
-------
text : str or unicode
str or unicode
The attribute value.
"""
raise com.AbstractMethodError(self)
Expand Down Expand Up @@ -264,7 +264,7 @@ def _parse_td(self, obj):

Returns
-------
columns : list of node-like
list of node-like
These are the elements of each row, i.e., the columns.
"""
raise com.AbstractMethodError(self)
Expand All @@ -279,7 +279,8 @@ def _parse_thead_tr(self, table):

Returns
-------
rows : list of <tr> row elements of a table
list of node-like
These are the <tr> row elements of a table.
"""
raise com.AbstractMethodError(self)

Expand All @@ -297,7 +298,8 @@ def _parse_tbody_tr(self, table):

Returns
-------
rows : list of <tr> row elements of a table
list of node-like
These are the <tr> row elements of a table.
"""
raise com.AbstractMethodError(self)

Expand All @@ -311,7 +313,8 @@ def _parse_tfoot_tr(self, table):

Returns
-------
rows : list of <tr> row elements of a table
list of node-like
These are the <tr> row elements of a table.
"""
raise com.AbstractMethodError(self)

Expand All @@ -336,7 +339,8 @@ def _parse_tables(self, doc, match, attrs):

Returns
-------
tables : list of HTML <table> elements to be parsed into raw data.
list of node-like
HTML <table> elements to be parsed into raw data.
"""
raise com.AbstractMethodError(self)

Expand All @@ -354,8 +358,8 @@ def _equals_tag(self, obj, tag):

Returns
-------
is_tag_equal : boolean
boolean indicating if the object is equal to tag 'tag'
boolean
Whether the object is equal to tag 'tag'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parameter references are typically done in backticks on docstrings, so you can change "tag 'tag'" to simply "`tag`"

"""
raise com.AbstractMethodError(self)

Expand All @@ -365,7 +369,8 @@ def _build_doc(self):

Returns
-------
obj : the DOM from which to parse the table element.
node-like
The DOM from which to parse the table element.
"""
raise com.AbstractMethodError(self)

Expand All @@ -390,7 +395,7 @@ def _parse_thead_tbody_tfoot(self, table_html):

Returns
-------
tuple of (header, body, footer)
tuple of (header, body, footer), each a list of list-of-text rows.
"""

header_rows = self._parse_thead_tr(table_html)
Expand Down Expand Up @@ -432,7 +437,8 @@ def _expand_colspan_rowspan(self, rows):

Returns
-------
res : list of rows, each of which is a list of str in that row
list of list
Each returned row is a list of str text.
"""

all_texts = [] # list of rows, each a list of str
Expand Down Expand Up @@ -501,14 +507,14 @@ def _handle_hidden_tables(self, tbl_list, attr_name):

Parameters
----------
tbl_list : list of Tag or list of Element
tbl_list : list of node-like
Type of list elements will vary depending upon parser used
attr_name : str
Name of the accessor for retrieving HTML attributes

Returns
-------
list of Tag or list of Element
list of node-like
Return type matches `tbl_list`
"""
if not self.displayed_only:
Expand Down
0