|
5 | 5 | # Copyright (C) 2002 Python Software Foundation. |
6 | 6 | # Written by Greg Ward <gward@python.net> |
7 | 7 |
|
8 | | -import re |
9 | | - |
10 | 8 | __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] |
11 | 9 |
|
| 10 | + |
| 11 | +class _cached_regex: |
| 12 | + def __init__(self, pattern): |
| 13 | + self.pattern = pattern |
| 14 | + |
| 15 | + def __set_name__(self, owner, name): |
| 16 | + self.attr_name = name |
| 17 | + |
| 18 | + def __get__(self, instance, owner=None): |
| 19 | + if owner is None: |
| 20 | + return self |
| 21 | + import re |
| 22 | + # replace this descriptor with the compiled pattern |
| 23 | + pat = re.compile(self.pattern) |
| 24 | + setattr(owner, self.attr_name, pat) |
| 25 | + return pat |
| 26 | + |
| 27 | + |
12 | 28 | # Hardcode the recognized whitespace characters to the US-ASCII |
13 | 29 | # whitespace characters. The main reason for doing this is that |
14 | 30 | # some Unicode spaces (like \u00a0) are non-breaking whitespaces. |
@@ -73,41 +89,39 @@ class TextWrapper: |
73 | 89 | # (after stripping out empty strings). |
74 | 90 | word_punct = r'[\w!"\'&.,?]' |
75 | 91 | letter = r'[^\d\W]' |
76 | | - whitespace = r'[%s]' % re.escape(_whitespace) |
77 | | - nowhitespace = '[^' + whitespace[1:] |
78 | | - wordsep_re = re.compile(r''' |
| 92 | + whitespace = fr'[{_whitespace}]' |
| 93 | + no_whitespace = f'[^{_whitespace}]' |
| 94 | + wordsep_re = _cached_regex(fr'''(?x) |
79 | 95 | ( # any whitespace |
80 | | - %(ws)s+ |
| 96 | + {whitespace}+ |
81 | 97 | | # em-dash between words |
82 | | - (?<=%(wp)s) -{2,} (?=\w) |
| 98 | + (?<={word_punct}) -{{2,}} (?=\w) |
83 | 99 | | # word, possibly hyphenated |
84 | | - %(nws)s+? (?: |
| 100 | + {no_whitespace}+? (?: |
85 | 101 | # hyphenated word |
86 | | - -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) |
87 | | - (?= %(lt)s -? %(lt)s) |
| 102 | + -(?: (?<={letter}{{2}}-) | (?<={letter}-{letter}-)) |
| 103 | + (?= {letter} -? {letter}) |
88 | 104 | | # end of word |
89 | | - (?=%(ws)s|\Z) |
| 105 | + (?={whitespace}|\Z) |
90 | 106 | | # em-dash |
91 | | - (?<=%(wp)s) (?=-{2,}\w) |
| 107 | + (?<={word_punct}) (?=-{{2,}}\w) |
92 | 108 | ) |
93 | | - )''' % {'wp': word_punct, 'lt': letter, |
94 | | - 'ws': whitespace, 'nws': nowhitespace}, |
95 | | - re.VERBOSE) |
96 | | - del word_punct, letter, nowhitespace |
| 109 | + )''') |
| 110 | + del word_punct, letter, no_whitespace |
97 | 111 |
|
98 | 112 | # This less funky little regex just split on recognized spaces. E.g. |
99 | 113 | # "Hello there -- you goof-ball, use the -b option!" |
100 | 114 | # splits into |
101 | 115 | # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ |
102 | | - wordsep_simple_re = re.compile(r'(%s+)' % whitespace) |
| 116 | + wordsep_simple_re = _cached_regex(fr'({whitespace}+)') |
103 | 117 | del whitespace |
104 | 118 |
|
105 | 119 | # XXX this is not locale- or charset-aware -- string.lowercase |
106 | 120 | # is US-ASCII only (and therefore English-only) |
107 | | - sentence_end_re = re.compile(r'[a-z]' # lowercase letter |
108 | | - r'[\.\!\?]' # sentence-ending punct. |
109 | | - r'[\"\']?' # optional end-of-quote |
110 | | - r'\Z') # end of chunk |
| 121 | + sentence_end_re = _cached_regex(r'[a-z]' # lowercase letter |
| 122 | + r'[\.\!\?]' # sentence-ending punct. |
| 123 | + r'[\"\']?' # optional end-of-quote |
| 124 | + r'\Z') # end of chunk |
111 | 125 |
|
112 | 126 | def __init__(self, |
113 | 127 | width=70, |
@@ -250,7 +264,7 @@ def _wrap_chunks(self, chunks): |
250 | 264 | """ |
251 | 265 | lines = [] |
252 | 266 | if self.width <= 0: |
253 | | - raise ValueError("invalid width %r (must be > 0)" % self.width) |
| 267 | + raise ValueError(f"invalid width {self.width!r} (must be > 0)") |
254 | 268 | if self.max_lines is not None: |
255 | 269 | if self.max_lines > 1: |
256 | 270 | indent = self.subsequent_indent |
|
0 commit comments