8000 para: Paragraph.text includes hyperlink text · python-openxml/python-docx@9abd14a · GitHub
[go: up one dir, main page]

Skip to content

Commit 9abd14a

Browse files
committed
para: Paragraph.text includes hyperlink text
1 parent a02c220 commit 9abd14a

File tree

5 files changed

+32
-5
lines changed

5 files changed

+32
-5
lines changed

features/par-access-inner-content.feature

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,8 @@ Feature: Access paragraph inner-content including hyperlinks
4242
| no | 0 |
4343
| one | 1 |
4444
| two | 2 |
45+
46+
47+
Scenario: Paragraph.text contains both run-text and hyperlink-text
48+
Given a paragraph having three hyperlinks
49+
Then paragraph.text contains the text of both the runs and the hyperlinks

features/steps/paragraph.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,15 @@ def then_paragraph_style_is_value(context: Context, value_key: str):
190190
assert paragraph.style == expected_value
191191

192192

193+
@then("paragraph.text contains the text of both the runs and the hyperlinks")
194+
def then_paragraph_text_contains_the_text_of_both_the_runs_and_the_hyperlinks(
195+
context: Context,
196+
):
197+
actual = context.paragraph.text
198+
expected = "Three hyperlinks: the first one here, the second one, and the third."
199+
assert actual == expected, f"expected:\n'{expected}'\n\ngot:\n'{actual}'"
200+
201+
193202
@then("the document contains four paragraphs")
194203
def then_the_document_contains_four_paragraphs(context: Context):
195204
assert len(context.document.paragraphs) == 4

src/docx/oxml/text/paragraph.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@ def style(self, style):
8888
pPr = self.get_or_add_pPr()
8989
pPr.style = style
9090

91+
@property # pyright: ignore[reportIncompatibleVariableOverride]
92+
def text(self):
93+
"""The textual content of this paragraph.
94+
95+
Inner-content child elements like `w:r` and `w:hyperlink` are translated to
96+
their text equivalent.
97+
"""
98+
return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
99+
91100
def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
92101
self.insert(0, pPr)
93102
return pPr

src/docx/text/paragraph.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,9 @@ def style(self, style_or_name: str | ParagraphStyle | None):
150150

151151
@property
152152
def text(self) -> str:
153-
"""String formed by concatenating the text of each run in the paragraph.
153+
"""The textual content of this paragraph.
154154
155+
The text includes the visible-text portion of any hyperlinks in the paragraph.
155156
Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters
156157
respectively.
157158
@@ -161,10 +162,7 @@ def text(self) -> str:
161162
character is mapped to a line break. Paragraph-level formatting, such as style,
162163
is preserved. All run-level formatting, such as bold or italic, is removed.
163164
"""
164-
text = ""
165-
for run in self.runs:
166-
text += run.text
167-
return text
165+
return self._p.text
168166

169167
@text.setter
170168
def text(self, text: str | None):

tests/text/test_paragraph.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,15 @@ def it_provides_access_to_the_rendered_page_breaks_it_contains(
143143
('w:p/w:r/(w:t"foo", w:tab, w:t"bar")', "foo\tbar"),
144144
('w:p/w:r/(w:t"foo", w:br, w:t"bar")', "foo\nbar"),
145145
('w:p/w:r/(w:t"foo", w:cr, w:t"bar")', "foo\nbar"),
146+
(
147+
'w:p/(w:r/w:t"click ",w:hyperlink{r:id=rId6}/w:r/w:t"here",'
148+
'w:r/w:t" for more")',
149+
"click here for more",
150+
),
146151
],
147152
)
148153
def it_knows_the_text_it_contains(self, p_cxml: str, expected_value: str):
154+
"""Including the text of embedded hyperlinks."""
149155
paragraph = Paragraph(element(p_cxml), None)
150156
assert paragraph.text == expected_value
151157

0 commit comments

Comments
 (0)
0