Skip to content

Commit 9257602

Browse files
committed
fix tag regexp to match quoted groups correctly
Fixed issue in lexer where the regexp used to match tags would not correctly interpret quoted sections individually. While this parsing issue still produced the same expected tag structure later on, the mis-handling of quoted sections was also subject to a regexp crash if a tag had a large number of quotes within its quoted sections. Fixes: #366 Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0
1 parent 7c5b28a commit 9257602

3 files changed

Lines changed: 34 additions & 8 deletions

File tree

doc/build/unreleased/366.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
.. change::
2+
:tags: bug, lexer
3+
:tickets: 366
4+
5+
Fixed issue in lexer where the regexp used to match tags would not
6+
correctly interpret quoted sections individually. While this parsing issue
7+
still produced the same expected tag structure later on, the mis-handling
8+
of quoted sections was also subject to a regexp crash if a tag had a large
9+
number of quotes within its quoted sections.

mako/lexer.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,20 +272,24 @@ def parse(self):
272272
return self.template
273273

274274
def match_tag_start(self):
275-
match = self.match(
276-
r"""
275+
reg = r"""
277276
\<% # opening tag
278277
279278
([\w\.\:]+) # keyword
280279
281-
((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
280+
((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \
282281
# sign, string expression
282+
# comma is for backwards compat
283+
# identified in #366
283284
284285
\s* # more whitespace
285286
286287
(/)?> # closing
287288
288-
""",
289+
"""
290+
291+
match = self.match(
292+
reg,
289293
re.I | re.S | re.X,
290294
)
291295

test/test_lexer.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import re
22

3+
import pytest
4+
35
from mako import compat
46
from mako import exceptions
57
from mako import parsetree
@@ -146,6 +148,10 @@ def test_noexpr_allowed(self):
146148
"""
147149
assert_raises(exceptions.CompileException, Lexer(template).parse)
148150

151+
def test_tag_many_quotes(self):
152+
template = "<%0" + '"' * 3000
153+
assert_raises(exceptions.SyntaxException, Lexer(template).parse)
154+
149155
def test_unmatched_tag(self):
150156
template = """
151157
<%namespace name="bar">
@@ -432,9 +438,16 @@ def test_expr_in_attribute(self):
432438
),
433439
)
434440

435-
def test_pagetag(self):
436-
template = """
437-
<%page cached="True", args="a, b"/>
441+
@pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)])
442+
def test_pagetag(self, comma, numchars):
443+
# note that the comma here looks like:
444+
# <%page cached="True", args="a, b"/>
445+
# that's what this test has looked like for decades, however, the
446+
# comma there is not actually the right syntax. When issue #366
447+
# was fixed, the reg was altered to accommodate for this comma to allow
448+
# backwards compat
449+
template = f"""
450+
<%page cached="True"{comma} args="a, b"/>
438451
439452
some template
440453
"""
@@ -453,7 +466,7 @@ def test_pagetag(self):
453466
454467
some template
455468
""",
456-
(2, 48),
469+
(2, numchars),
457470
),
458471
],
459472
),

0 commit comments

Comments
 (0)