From 22f498f42964d4a7c17b82ce6d2351efa161b459 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Thu, 23 Apr 2026 17:16:50 +0200 Subject: [PATCH] gh-148762: speed up caret match in regexes Signed-off-by: Harmen Stoppels --- .../2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst | 2 ++ Modules/_sre/sre_lib.h | 12 ++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst diff --git a/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst b/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst new file mode 100644 index 00000000000000..e7e3de7a96cbd3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-19-23-29-38.gh-issue-148762.HSCJka.rst @@ -0,0 +1,2 @@ +Multiline regexes starting with a caret, such as ``re.compile("^foo", +re.MULTILINE)``, now run significantly faster. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index df377905bfae0d..70de4cccefd989 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1855,6 +1855,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) return 0; } while (status == 0 && ptr < end) { + if (pattern[0] == SRE_OP_AT && + pattern[1] == SRE_AT_BEGINNING_LINE && + !SRE_IS_LINEBREAK((int) ptr[-1])) + { + /* fast-forward to the next newline character */ + while (ptr < end && !SRE_IS_LINEBREAK((int) *ptr)) { + ptr++; + } + if (ptr >= end) { + return 0; + } + } ptr++; RESET_CAPTURE_GROUP(); TRACE(("|%p|%p|SEARCH\n", pattern, ptr));