Skip to content

Commit b5ac84b

Browse files
zzzeekGerrit Code Review
authored andcommitted
Merge "fix tag regexp to match quoted groups correctly" into main
2 parents dbbaad3 + 9257602 commit b5ac84b

3 files changed

Lines changed: 34 additions & 8 deletions

File tree

doc/build/unreleased/366.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
.. change::
2+
:tags: bug, lexer
3+
:tickets: 366
4+
5+
Fixed issue in lexer where the regexp used to match tags would not
6+
correctly interpret quoted sections individually. While this parsing issue
7+
still produced the same expected tag structure later on, the mis-handling
8+
of quoted sections was also subject to a regexp crash if a tag had a large
9+
number of quotes within its quoted sections.

mako/lexer.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,20 +272,24 @@ def parse(self):
272272
return self.template
273273

274274
def match_tag_start(self):
275-
match = self.match(
276-
r"""
275+
reg = r"""
277276
\<% # opening tag
278277
279278
([\w\.\:]+) # keyword
280279
281-
((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
280+
((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \
282281
# sign, string expression
282+
# comma is for backwards compat
283+
# identified in #366
283284
284285
\s* # more whitespace
285286
286287
(/)?> # closing
287288
288-
""",
289+
"""
290+
291+
match = self.match(
292+
reg,
289293
re.I | re.S | re.X,
290294
)
291295

test/test_lexer.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import re
22

3+
import pytest
4+
35
from mako import compat
46
from mako import exceptions
57
from mako import parsetree
@@ -146,6 +148,10 @@ def test_noexpr_allowed(self):
146148
"""
147149
assert_raises(exceptions.CompileException, Lexer(template).parse)
148150

151+
def test_tag_many_quotes(self):
152+
template = "<%0" + '"' * 3000
153+
assert_raises(exceptions.SyntaxException, Lexer(template).parse)
154+
149155
def test_unmatched_tag(self):
150156
template = """
151157
<%namespace name="bar">
@@ -432,9 +438,16 @@ def test_expr_in_attribute(self):
432438
),
433439
)
434440

435-
def test_pagetag(self):
436-
template = """
437-
<%page cached="True", args="a, b"/>
441+
@pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)])
442+
def test_pagetag(self, comma, numchars):
443+
# note that the comma here looks like:
444+
# <%page cached="True", args="a, b"/>
445+
# that's what this test has looked like for decades, however, the
446+
# comma there is not actually the right syntax. When issue #366
447+
# was fixed, the reg was altered to accommodate for this comma to allow
448+
# backwards compat
449+
template = f"""
450+
<%page cached="True"{comma} args="a, b"/>
438451
439452
some template
440453
"""
@@ -453,7 +466,7 @@ def test_pagetag(self):
453466
454467
some template
455468
""",
456-
(2, 48),
469+
(2, numchars),
457470
),
458471
],
459472
),

0 commit comments

Comments
 (0)