Skip to content

Commit b83d282

Browse files
committed
Add _pyrepl.utils.unbracket
1 parent 86d5fa9 commit b83d282

2 files changed

Lines changed: 25 additions & 39 deletions

File tree

Lib/_pyrepl/reader.py

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131

3232
from . import commands, console, input
33-
from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width
33+
from .utils import wlen, unbracket, str_width
3434
from .trace import trace
3535

3636

@@ -421,42 +421,15 @@ def calc_screen(self) -> list[str]:
421421

422422
@staticmethod
423423
def process_prompt(prompt: str) -> tuple[str, int]:
424-
"""Process the prompt.
424+
r"""Return a tuple with the prompt string and its visible length.
425425
426-
This means calculate the length of the prompt. The character \x01
427-
and \x02 are used to bracket ANSI control sequences and need to be
428-
excluded from the length calculation. So also a copy of the prompt
429-
is returned with these control characters removed."""
430-
431-
# The logic below also ignores the length of common escape
432-
# sequences if they were not explicitly within \x01...\x02.
433-
# They are CSI (or ANSI) sequences ( ESC [ ... LETTER )
434-
435-
# wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars,
436-
# which breaks the logic below so we redefine it here.
437-
def wlen(s: str) -> int:
438-
return sum(str_width(i) for i in s)
439-
440-
out_prompt = ""
441-
l = wlen(prompt)
442-
pos = 0
443-
while True:
444-
s = prompt.find("\x01", pos)
445-
if s == -1:
446-
break
447-
e = prompt.find("\x02", s)
448-
if e == -1:
449-
break
450-
# Found start and end brackets, subtract from string length
451-
l = l - (e - s + 1)
452-
keep = prompt[pos:s]
453-
l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep)))
454-
out_prompt += keep + prompt[s + 1 : e]
455-
pos = e + 1
456-
keep = prompt[pos:]
457-
l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep)))
458-
out_prompt += keep
459-
return out_prompt, l
426+
The prompt string has the zero-width brackets recognized by shells
427+
(\x01 and \x02) removed. The length ignores anything between those
428+
brackets as well as any ANSI escape sequences.
429+
"""
430+
out_prompt = unbracket(prompt, including_content=False)
431+
visible_prompt = unbracket(prompt, including_content=True)
432+
return out_prompt, wlen(visible_prompt)
460433

461434
def bow(self, p: int | None = None) -> int:
462435
"""Return the 0-based index of the word break preceding p most

Lib/_pyrepl/utils.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,36 @@
33
import functools
44

55
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
6+
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
7+
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
68

79

810
@functools.cache
911
def str_width(c: str) -> int:
1012
if ord(c) < 128:
1113
return 1
1214
w = unicodedata.east_asian_width(c)
13-
if w in ('N', 'Na', 'H', 'A'):
15+
if w in ("N", "Na", "H", "A"):
1416
return 1
1517
return 2
1618

1719

1820
def wlen(s: str) -> int:
19-
if len(s) == 1 and s != '\x1a':
21+
if len(s) == 1 and s != "\x1a":
2022
return str_width(s)
2123
length = sum(str_width(i) for i in s)
2224
# remove lengths of any escape sequences
2325
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
24-
ctrl_z_cnt = s.count('\x1a')
26+
ctrl_z_cnt = s.count("\x1a")
2527
return length - sum(len(i) for i in sequence) + ctrl_z_cnt
28+
29+
30+
def unbracket(s: str, including_content: bool = False) -> str:
31+
r"""Return `s` with \001 and \002 characters removed.
32+
33+
If `including_content` is True, content between \001 and \002 is also
34+
stripped.
35+
"""
36+
if including_content:
37+
return ZERO_WIDTH_BRACKET.sub("", s)
38+
return s.translate(ZERO_WIDTH_TRANS)

0 commit comments

Comments
 (0)