E5DD ENH: add LaTeX math mode with parentheses by natmokval · Pull Request #51903 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ENH: add math mode with parentheses II
  • Loading branch information
natmokval committed Mar 15, 2023
commit 2320fb3f3690d00601d936c68aa14ef16b1e7582
40 changes: 28 additions & 12 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,7 +1118,7 @@ def format(
\end{tabular}

Applying ``escape`` in 'latex-math' mode. In the example below
we enter math mode using the charackter ``$``.
we enter math mode using the character ``$``.

>>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \
... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]])
Expand All @@ -1130,7 +1130,7 @@ def format(
1 & \%\#\textasciicircum \space $ \$x^2 $ \\
\end{tabular}

We can use the charackter ``\(`` to enter math mode and the charackter ``\)``
We can use the character ``\(`` to enter math mode and the character ``\)``
to close math mode.

>>> df = pd.DataFrame([[r"\(\sum_{i=1}^{10} a_i\) a~b \(\alpha \
Expand Down Expand Up @@ -2359,7 +2359,8 @@ def _escape_latex(s):
Escaped string
"""
return (
s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
s.replace("\\ ", "ab2§=§8yz")
.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
.replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
.replace("&", "\\&")
.replace("%", "\\%")
Expand All @@ -2372,8 +2373,6 @@ def _escape_latex(s):
.replace("~", "\\textasciitilde ")
.replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces
.replace("^", "\\textasciicircum ")
.replace("ab2§=§8yz(", "\\( ")
.replace("ab2§=§8yz)", "\\) ")
.replace("ab2§=§8yz", "\\textbackslash ")
)

Expand All @@ -2385,7 +2384,6 @@ def _escape_latex_math(s):
The substrings in LaTeX math mode, which either are surrounded
by two characters ``$`` or start with the character ``\(`` and end with ``\)``,
are preserved without escaping. Otherwise regular LaTeX escaping applies.
See ``_escape_latex()``.

Parameters
----------
Expand Down Expand Up @@ -2419,15 +2417,33 @@ def _math_mode_with_parentheses(s):
for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s):
if item.startswith("LEFT") and item.endswith("RIGHT"):
res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)"))
else:
elif "LEFT" in item and "RIGHT" in item:
res.append(
_escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)")
)
else:
res.append(
_escape_latex(item)
.replace("LEFT", r"\textbackslash (")
.replace("RIGHT", r"\textbackslash )")
)
return "".join(res)

if s.replace(r"\$", "ab").find(r"$") > -1:
return _math_mode_with_dollar(s)
elif s.find(r"\(") > -1:
return _math_mode_with_parentheses(s)
s = s.replace(r"\$", r"rt8§=§7wz")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this code.
You are replacing the string \$ with a uuid string, first.
Then, you are searching for a pattern (r"\$.*?\$") that cannot exist, since it was replaced.

Then in line 2490 you are replacing the same string s again with the same uuid, but this is unnecessary since it has already done this in line 2481.
I think your tests pass and this does the correct thing but I think some of these lines are redundant and do nothing for the overall effect??

Copy link
Contributor Author
@natmokval natmokval Mar 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the comment.
It’s right, the replacement in line 2490 has a mistake. The right one would be the reverse replacement: s.replace(r"rt8§=§7wz", r"\$")

I can explain what I am trying to do. When I checked the function _escape_latex_math I noticed that for a string like r"$&%#^$" which contains only one sign "$" and only one combination "\$" I got a wrong result, because the function processed this string in math mode. By doing the replacement in line 2481 I exclude from the consideration the string “\$” to avoid confusing it with “$”. Then I get the correct result and do the reverse replacement. If we don’t have a combination of one sign "$" and one sign "\$" we don’t need to do this check, but I prefer to leave it.

I corrected my mistake a made a new commit. I also added an example for this case in the test.

pattern_d = re.compile(r"\$.*?\$")
pattern_p = re.compile(r"\\(.*?\\)")
pos_d = 0
pos_p = 0
ps_d = pattern_d.search(s, pos_d)
ps_p = pattern_p.search(s, pos_p)
mode = []
if ps_d:
mode.append(ps_d.span()[0])
if ps_p:
mode.append(ps_p.span()[0])
if len(mode) == 0:
return _escape_latex(s.replace(r"\$", r"rt8§=§7wz"))
if s[min(mode)] == r"$":
return _math_mode_with_dollar(s.replace(r"\$", r"rt8§=§7wz"))
else:
return _escape_latex(s)
return _math_mode_with_parentheses(s.replace(r"\$", r"rt8§=§7wz"))
45 changes: 35 additions & 10 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_format_clear(styler, func, attr, kwargs):
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash \\space ",
"\\textbackslash ",
),
],
)
Expand Down Expand Up @@ -195,19 +195,44 @@ def test_format_escape_html(escape, exp):
@pytest.mark.parametrize(
"chars, expected",
[
(r"$\frac{1}{2} \$ x^2$ ", r"$\frac{1}{2} \$ x^2$ "),
(r"\(\frac{1}{2} \$ x^2\) ", r"\(\frac{1}{2} \$ x^2\) "),
(r"\)", r"\) "),
(
r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
"".join(
[
r"$ \$&%#_{}~^\ $ ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum \textbackslash \$",
]
),
),
(
r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
"".join(
[
r"\( &%#_{}~^\ \) ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \textbackslash (",
]
),
),
(
r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
"".join(
[
r"$ \frac{1}{2} $",
r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
]
),
),
],
)
def test_format_escape_latex_math(chars, expected):
df = DataFrame([["".join([chars, "~%#^"])]])

# GH 51903
# latex-math escape works for each DataFrame cell separately.
# If we have a combination of dollar signs and brackets,
# the sign which occurs first would apply.
df = DataFrame([[chars]])
s = df.style.format("{0}", escape="latex-math")
assert (
"".join([expected, r"\textasciitilde \%\#\textasciicircum "])
== s._translate(True, True)["body"][0][1]["display_value"]
)
assert s._translate(True, True)["body"][0][1]["display_value"] == expected


def test_format_escape_na_rep():
Expand Down
0