10BC0 ENH: add LaTeX math mode with parentheses by natmokval · Pull Request #51903 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ENH: add math mode with parentheses III
  • Loading branch information
natmokval committed Mar 19, 2023
commit 29e357c8eb420006315255d4126731a12616b112
119 changes: 78 additions & 41 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,8 +988,9 @@ def format(
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Use 'latex-math' to replace the characters the same way as in 'latex' mode,
except for math substrings, which start and end with ``$``.
Escaping is done before ``formatter``.
except for math substrings, which either are surrounded
by two characters ``$`` or start with the character ``\(`` and
end with ``\)``. Escaping is done before ``formatter``.

.. versionadded:: 1.3.0

Expand Down Expand Up @@ -2359,8 +2360,7 @@ def _escape_latex(s):
Escaped string
"""
return (
s.replace("\\ ", "ab2§=§8yz")
.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
.replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
.replace("&", "\\&")
.replace("%", "\\%")
Expand All @@ -2377,6 +2377,75 @@ def _escape_latex(s):
)


def _math_mode_with_dollar(s):
r"""
All characters in LaTeX math mode are preserved.

The substrings in LaTeX math mode, which start with
the character ``$`` and end with ``$``, are preserved
without escaping. Otherwise regular LaTeX escaping applies.

Parameters
----------
s : str
Input to be escaped

Return
------
str :
Escaped string
"""
s = s.replace(r"\$", r"rt8§=§7wz")
pattern = re.compile(r"\$.*?\$")
pos = 0
ps = pattern.search(s, pos)
res = []
while ps:
res.append(_escape_latex(s[pos : ps.span()[0]]))
res.append(ps.group())
pos = ps.span()[1]
ps = pattern.search(s, pos)

res.append(_escape_latex(s[pos : len(s)]))
return "".join(res).replace(r"rt8§=§7wz", r"\$")


def _math_mode_with_parentheses(s):
r"""
All characters in LaTeX math mode are preserved.

The substrings in LaTeX math mode, which start with
the character ``\(`` and end with ``\)``, are preserved
without escaping. Otherwise regular LaTeX escaping applies.

Parameters
----------
s : str
Input to be escaped

Return
------
str :
Escaped string
"""
s = s.replace(r"\(", r"LEFT§=§6yzLEFT").replace(r"\)", r"RIGHTab5§=§RIGHT")
res = []
for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s):
if item.startswith("LEFT") and item.endswith("RIGHT"):
res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)"))
elif "LEFT" in item and "RIGHT" in item:
res.append(
_escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)")
)
else:
res.append(
_escape_latex(item)
.replace("LEFT", r"\textbackslash (")
.replace("RIGHT", r"\textbackslash )")
)
return "".join(res)


def _escape_latex_math(s):
r"""
All characters in LaTeX math mode are preserved.
Expand All @@ -2395,43 +2464,9 @@ def _escape_latex_math(s):
str :
Escaped string
"""

def _math_mode_with_dollar(s):
s = s.replace(r"\$", r"rt8§=§7wz")
pattern = re.compile(r"\$.*?\$")
pos = 0
ps = pattern.search(s, pos)
res = []
while ps:
res.append(_escape_latex(s[pos : ps.span()[0]]))
res.append(ps.group())
pos = ps.span()[1]
ps = pattern.search(s, pos)

res.append(_escape_latex(s[pos : len(s)]))
return "".join(res).replace(r"rt8§=§7wz", r"\$")

def _math_mode_with_parentheses(s):
s = s.replace(r"\(", r"LEFT§=§6yzLEFT").replace(r"\)", r"RIGHTab5§=§RIGHT")
res = []
for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s):
if item.startswith("LEFT") and item.endswith("RIGHT"):
res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)"))
elif "LEFT" in item and "RIGHT" in item:
res.append(
_escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)")
)
else:
res.append(
_escape_latex(item)
.replace("LEFT", r"\textbackslash (")
.replace("RIGHT", r"\textbackslash )")
)
return "".join(res)

s = s.replace(r"\$", r"rt8§=§7wz")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this code.
You are replacing the string \$ with a uuid string, first.
Then, you are searching for a pattern (r"\$.*?\$") that cannot exist, since it was replaced.

Then in line 2490 you are replacing the same string s again with the same uuid, but this is unnecessary since it has already done this in line 2481.
I think your tests pass and this does the correct thing but I think some of these lines are redundant and do nothing for the overall effect??

Copy link
Contributor Author
@natmokval natmokval Mar 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the comment.
It’s right, the replacement in line 2490 has a mistake. The right one would be the reverse replacement: s.replace(r"rt8§=§7wz", r"\$")

I can explain what I am trying to do. When I checked the function _escape_latex_math I noticed that for a string like r"$&%#^$" which contains only one sign "$" and only one combination "\$" I got a wrong result, because the function processed this string in math mode. By doing the replacement in line 2481 I exclude from the consideration the string “\$” to avoid confusing it with “$”. Then I get the correct result and do the reverse replacement. If we don’t have a combination of one sign "$" and one sign "\$" we don’t need to do this check, but I prefer to leave it.

I corrected my mistake a made a new commit. I also added an example for this case in the test.

pattern_d = re.compile(r"\$.*?\$")
pattern_p = re.compile(r"\\(.*?\\)")
pattern_p = re.compile(r"\(.*?\)")
pos_d = 0
pos_p = 0
ps_d = pattern_d.search(s, pos_d)
Expand All @@ -2443,7 +2478,9 @@ def _math_mode_with_parentheses(s):
mode.append(ps_p.span()[0])
if len(mode) == 0:
return _escape_latex(s.replace(r"\$", r"rt8§=§7wz"))
if s[min(mode)] == r"$":
if s[mode[0]] == r"$":
return _math_mode_with_dollar(s.replace(r"\$", r"rt8§=§7wz"))
else:
if s[mode[0] - 1 : mode[0] + 1] == r"\(":
return _math_mode_with_parentheses(s.replace(r"\$", r"rt8§=§7wz"))
else:
return _escape_latex(s.replace(r"\$", r"rt8§=§7wz"))
7 changes: 4 additions & 3 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_format_clear(styler, func, attr, kwargs):
"latex",
'<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
"\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
"\\textbackslash ",
"\\textbackslash \\space ",
),
],
)
Expand Down Expand Up @@ -200,7 +200,8 @@ def test_format_escape_html(escape, exp):
"".join(
[
r"$ \$&%#_{}~^\ $ ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum \textbackslash \$",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \space \$",
]
),
),
Expand All @@ -210,7 +211,7 @@ def test_format_escape_html(escape, exp):
[
r"\( &%#_{}~^\ \) ",
r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
r"\textbackslash \textbackslash (",
r"\textbackslash \space \textbackslash (",
]
),
),
Expand Down
0