optimize some code paths

atetubou · zzzeek · commit 4279e6538dc3 · 2022-06-30T10:59:44.000-04:00
Optimized some codepaths within the lexer/Python code generation process, improving performance for generation of templates prior to their being cached. Pull request courtesy Takuto Ikuta. This shows around 10% performance improvement in our use case (https://crbug.com/1214033#c32). Closes: #361 Pull-request: #361 Pull-request-sha: bcdee5c Change-Id: If647f77a52d5745019dcc46f82fd7a928f990757
diff --git a/doc/build/unreleased/361.rst b/doc/build/unreleased/361.rst
@@ -0,0 +1,7 @@
+.. change::
+    :tags: performance
+    :tickets: 361
+
+    Optimized some codepaths within the lexer/Python code generation process,
+    improving performance for generation of templates prior to their being
+    cached. Pull request courtesy Takuto Ikuta.
diff --git a/mako/lexer.py b/mako/lexer.py
@@ -74,12 +74,11 @@ def match_reg(self, reg):
             (start, end) = match.span()
             self.match_position = end + 1 if end == start else end
             self.matched_lineno = self.lineno
-            lines = re.findall(r"\n", self.text[mp : self.match_position])
             cp = mp - 1
-            while cp >= 0 and cp < self.textlength and self.text[cp] != "\n":
-                cp -= 1
+            if cp >= 0 and cp < self.textlength:
+                cp = self.text[: cp + 1].rfind("\n")
             self.matched_charpos = mp - cp
-            self.lineno += len(lines)
+            self.lineno += self.text[mp : self.match_position].count("\n")
         return match
 
     def parse_until_text(self, watch_nesting, *text):
diff --git a/mako/pygen.py b/mako/pygen.py
@@ -43,6 +43,15 @@ def __init__(self, stream):
         # source lines
         self.source_map = {}
 
+        self._re_space_comment = re.compile(r"^\s*#")
+        self._re_space = re.compile(r"^\s*$")
+        self._re_indent = re.compile(r":[ \t]*(?:#.*)?$")
+        self._re_compound = re.compile(r"^\s*(if|try|elif|while|for|with)")
+        self._re_indent_keyword = re.compile(
+            r"^\s*(def|class|else|elif|except|finally)"
+        )
+        self._re_unindentor = re.compile(r"^\s*(else|elif|except|finally).*\:")
+
     def _update_lineno(self, num):
         self.lineno += num
 
@@ -86,8 +95,8 @@ def writeline(self, line):
 
         if (
             line is None
-            or re.match(r"^\s*#", line)
-            or re.match(r"^\s*$", line)
+            or self._re_space_comment.match(line)
+            or self._re_space.match(line)
         ):
             hastext = False
         else:
@@ -121,12 +130,12 @@ def writeline(self, line):
         # note that a line can both decrase (before printing) and
         # then increase (after printing) the indentation level.
 
-        if re.search(r":[ \t]*(?:#.*)?$", line):
+        if self._re_indent.search(line):
             # increment indentation count, and also
             # keep track of what the keyword was that indented us,
             # if it is a python compound statement keyword
             # where we might have to look for an "unindent" keyword
-            match = re.match(r"^\s*(if|try|elif|while|for|with)", line)
+            match = self._re_compound.match(line)
             if match:
                 # its a "compound" keyword, so we will check for "unindentors"
                 indentor = match.group(1)
@@ -137,9 +146,7 @@ def writeline(self, line):
                 # its not a "compound" keyword.  but lets also
                 # test for valid Python keywords that might be indenting us,
                 # else assume its a non-indenting line
-                m2 = re.match(
-                    r"^\s*(def|class|else|elif|except|finally)", line
-                )
+                m2 = self._re_indent_keyword.match(line)
                 if m2:
                     self.indent += 1
                     self.indent_detail.append(indentor)
@@ -167,7 +174,7 @@ def _is_unindentor(self, line):
 
         # if the current line doesnt have one of the "unindentor" keywords,
         # return False
-        match = re.match(r"^\s*(else|elif|except|finally).*\:", line)
+        match = self._re_unindentor.match(line)
         # if True, whitespace matches up, we have a compound indentor,
         # and this line has an unindentor, this
         # is probably good enough
@@ -193,6 +200,9 @@ def _indent_line(self, line, stripspace=""):
 
         stripspace is a string of space that will be truncated from the
         start of the line before indenting."""
+        if stripspace == "":
+            # Fast path optimization.
+            return self.indentstring * self.indent + line
 
         return re.sub(
             r"^%s" % stripspace, self.indentstring * self.indent, line