Skip to content

Commit a759b01

Browse files
committed
Optimise import time for textwrap
1 parent 03f6c8e commit a759b01

1 file changed

Lines changed: 36 additions & 22 deletions

File tree

Lib/textwrap.py

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,26 @@
55
# Copyright (C) 2002 Python Software Foundation.
66
# Written by Greg Ward <gward@python.net>
77

8-
import re
9-
108
__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
119

10+
11+
class _cached_regex:
12+
def __init__(self, pattern):
13+
self.pattern = pattern
14+
15+
def __set_name__(self, owner, name):
16+
self.attr_name = name
17+
18+
def __get__(self, instance, owner=None):
19+
if owner is None:
20+
return self
21+
import re
22+
# replace this descriptor with the compiled pattern
23+
pat = re.compile(self.pattern)
24+
setattr(owner, self.attr_name, pat)
25+
return pat
26+
27+
1228
# Hardcode the recognized whitespace characters to the US-ASCII
1329
# whitespace characters. The main reason for doing this is that
1430
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
@@ -73,41 +89,39 @@ class TextWrapper:
7389
# (after stripping out empty strings).
7490
word_punct = r'[\w!"\'&.,?]'
7591
letter = r'[^\d\W]'
76-
whitespace = r'[%s]' % re.escape(_whitespace)
77-
nowhitespace = '[^' + whitespace[1:]
78-
wordsep_re = re.compile(r'''
92+
whitespace = fr'[{_whitespace}]'
93+
no_whitespace = f'[^{_whitespace}]'
94+
wordsep_re = _cached_regex(fr'''(?x)
7995
( # any whitespace
80-
%(ws)s+
96+
{whitespace}+
8197
| # em-dash between words
82-
(?<=%(wp)s) -{2,} (?=\w)
98+
(?<={word_punct}) -{{2,}} (?=\w)
8399
| # word, possibly hyphenated
84-
%(nws)s+? (?:
100+
{no_whitespace}+? (?:
85101
# hyphenated word
86-
-(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
87-
(?= %(lt)s -? %(lt)s)
102+
-(?: (?<={letter}{{2}}-) | (?<={letter}-{letter}-))
103+
(?= {letter} -? {letter})
88104
| # end of word
89-
(?=%(ws)s|\Z)
105+
(?={whitespace}|\Z)
90106
| # em-dash
91-
(?<=%(wp)s) (?=-{2,}\w)
107+
(?<={word_punct}) (?=-{{2,}}\w)
92108
)
93-
)''' % {'wp': word_punct, 'lt': letter,
94-
'ws': whitespace, 'nws': nowhitespace},
95-
re.VERBOSE)
96-
del word_punct, letter, nowhitespace
109+
)''')
110+
del word_punct, letter, no_whitespace
97111

98112
# This less funky little regex just split on recognized spaces. E.g.
99113
# "Hello there -- you goof-ball, use the -b option!"
100114
# splits into
101115
# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
102-
wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
116+
wordsep_simple_re = _cached_regex(fr'({whitespace}+)')
103117
del whitespace
104118

105119
# XXX this is not locale- or charset-aware -- string.lowercase
106120
# is US-ASCII only (and therefore English-only)
107-
sentence_end_re = re.compile(r'[a-z]' # lowercase letter
108-
r'[\.\!\?]' # sentence-ending punct.
109-
r'[\"\']?' # optional end-of-quote
110-
r'\Z') # end of chunk
121+
sentence_end_re = _cached_regex(r'[a-z]' # lowercase letter
122+
r'[\.\!\?]' # sentence-ending punct.
123+
r'[\"\']?' # optional end-of-quote
124+
r'\Z') # end of chunk
111125

112126
def __init__(self,
113127
width=70,
@@ -250,7 +264,7 @@ def _wrap_chunks(self, chunks):
250264
"""
251265
lines = []
252266
if self.width <= 0:
253-
raise ValueError("invalid width %r (must be > 0)" % self.width)
267+
raise ValueError(f"invalid width {self.width!r} (must be > 0)")
254268
if self.max_lines is not None:
255269
if self.max_lines > 1:
256270
indent = self.subsequent_indent

0 commit comments

Comments
 (0)