Add _pyrepl.utils.unbracket

python · ambv · Mar 21, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 19, 2025
commit b83d282616ad8549328d8b043979236ec25edf14
@@ -30,7 +30,7 @@
 
 
 from . import commands, console, input
-from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width
+from .utils import wlen, unbracket, str_width
 from .trace import trace
 
 
@@ -421,42 +421,15 @@ def calc_screen(self) -> list[str]:
 
     @staticmethod
     def process_prompt(prompt: str) -> tuple[str, int]:
-        """Process the prompt.
+        r"""Return a tuple with the prompt string and its visible length.
 
-        This means calculate the length of the prompt. The character \x01
-        and \x02 are used to bracket ANSI control sequences and need to be
-        excluded from the length calculation.  So also a copy of the prompt
-        is returned with these control characters removed."""
-
-        # The logic below also ignores the length of common escape
-        # sequences if they were not explicitly within \x01...\x02.
-        # They are CSI (or ANSI) sequences  ( ESC [ ... LETTER )
-
-        # wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars,
-        # which breaks the logic below so we redefine it here.
-        def wlen(s: str) -> int:
-            return sum(str_width(i) for i in s)
-
-        out_prompt = ""
-        l = wlen(prompt)
-        pos = 0
-        while True:
-            s = prompt.find("\x01", pos)
-            if s == -1:
-                break
-            e = prompt.find("\x02", s)
-            if e == -1:
-                break
-            # Found start and end brackets, subtract from string length
-            l = l - (e - s + 1)
-            keep = prompt[pos:s]
-            l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep)))
-            out_prompt += keep + prompt[s + 1 : e]
-            pos = e + 1
-        keep = prompt[pos:]
-        l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep)))
-        out_prompt += keep
-        return out_prompt, l
+        The prompt string has the zero-width brackets recognized by shells
+        (\x01 and \x02) removed.  The length ignores anything between those
+        brackets as well as any ANSI escape sequences.
-        The prompt string has the zero-width brackets recognized by shells
-        (\x01 and \x02) removed.  The length ignores anything between those
-        brackets as well as any ANSI escape sequences.
+        The prompt string has the zero-width brackets (\x01 and \x02) 
+        recognized by shells removed.  The length ignores anything between
+        those brackets as well as any ANSI escape sequences.
-        The prompt string has the zero-width brackets recognized by shells
-        (\x01 and \x02) removed.  The length ignores anything between those
-        brackets as well as any ANSI escape sequences.
+        The prompt string has the zero-width brackets (\x01 and \x02) 
+        recognized by shells removed.  The length ignores anything between
+        those brackets as well as any ANSI escape sequences.
+        """
+        out_prompt = unbracket(prompt, including_content=False)
+        visible_prompt = unbracket(prompt, including_content=True)
+        return out_prompt, wlen(visible_prompt)
 
     def bow(self, p: int | None = None) -> int:
         """Return the 0-based index of the word break preceding p most

@@ -3,23 +3,36 @@
 import functools
 
 ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
+ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
 
 
 @functools.cache
 def str_width(c: str) -> int:
     if ord(c) < 128:
         return 1
     w = unicodedata.east_asian_width(c)
-    if w in ('N', 'Na', 'H', 'A'):
+    if w in ("N", "Na", "H", "A"):
         return 1
     return 2
 
 
 def wlen(s: str) -> int:
-    if len(s) == 1 and s != '\x1a':
+    if len(s) == 1 and s != "\x1a":
         return str_width(s)
     length = sum(str_width(i) for i in s)
     # remove lengths of any escape sequences
     sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
-    ctrl_z_cnt = s.count('\x1a')
+    ctrl_z_cnt = s.count("\x1a")
     return length - sum(len(i) for i in sequence) + ctrl_z_cnt
+
+
+def unbracket(s: str, including_content: bool = False) -> str:
+    r"""Return `s` with \001 and \002 characters removed.
+
+    If `including_content` is True, content between \001 and \002 is also
+    stripped.
+    """
+    if including_content:
+        return ZERO_WIDTH_BRACKET.sub("", s)
+    return s.translate(ZERO_WIDTH_TRANS)