-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathregexps.ts
More file actions
131 lines (122 loc) · 3.98 KB
/
regexps.ts
File metadata and controls
131 lines (122 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/**
* @fileoverview Regular expression utilities including a spec-compliant
* `RegExp.escape` fallback. Provides regex escaping and pattern matching
* helpers.
*/
// Spec-compliant fallback for TC39 RegExp.escape (Node 24+ ships native):
// https://tc39.es/ecma262/#sec-regexp.escape
// https://tc39.es/ecma262/#sec-encodeforregexpescape
// SyntaxCharacter set plus `/` — these get a plain backslash prefix.
const SYNTAX_CHARACTERS = new Set('^$\\.*+?()[]{}|/')
// ControlEscape mappings: \t \n \v \f \r (spec Table 62).
const CONTROL_ESCAPES = new Map<number, string>([
[0x09, '\\t'],
[0x0a, '\\n'],
[0x0b, '\\v'],
[0x0c, '\\f'],
[0x0d, '\\r'],
])
// Other ASCII punctuators the spec explicitly hex-escapes (§22.2.5.1.1),
// plus any whitespace / line terminator / lone surrogate the spec routes
// through the same branch.
const OTHER_PUNCTUATORS = new Set(',-=<>#&!%:;@~\'`"')
// Additional whitespace / line terminator / surrogate code points the
// spec requires escaping. We enumerate the ones that commonly appear in
// string inputs; `String#codePointAt` iteration surfaces them as numbers.
// Whitespace: TAB, VT, FF, SP, NBSP, ZWNBSP, plus Unicode Space_Separator.
// LineTerminator: LF, CR, LS (U+2028), PS (U+2029).
// Lone surrogates: U+D800..U+DFFF.
function isSpecHexEscapeCp(cp: number): boolean {
if (OTHER_PUNCTUATORS.has(String.fromCodePoint(cp))) {
return true
}
// LineTerminator.
if (cp === 0x0a || cp === 0x0d || cp === 0x2028 || cp === 0x2029) {
return true
}
// Whitespace subset (ASCII/common — matches WhiteSpace production).
if (
cp === 0x09 ||
cp === 0x0b ||
cp === 0x0c ||
cp === 0x20 ||
cp === 0xa0 ||
cp === 0xfeff
) {
return true
}
// Lone surrogates.
if (cp >= 0xd800 && cp <= 0xdfff) {
return true
}
return false
}
function hex2(n: number): string {
return n.toString(16).padStart(2, '0')
}
function hex4(n: number): string {
return n.toString(16).padStart(4, '0')
}
function escapeRegExpFallback(str: string): string {
let out = ''
// Iterate by code point (String iterator yields UTF-16-safe chars).
let isFirst = true
for (const char of str) {
const cp = char.codePointAt(0)!
// Leading [0-9A-Za-z] always gets \xHH (guards against \0..\9 /
// \c merging in a larger pattern).
if (
isFirst &&
((cp >= 0x30 && cp <= 0x39) ||
(cp >= 0x41 && cp <= 0x5a) ||
(cp >= 0x61 && cp <= 0x7a))
) {
out += '\\x' + hex2(cp)
} else if (SYNTAX_CHARACTERS.has(char)) {
// SyntaxCharacter + `/`.
out += '\\' + char
} else {
const ctrl = CONTROL_ESCAPES.get(cp)
if (ctrl !== undefined) {
out += ctrl
} else if (isSpecHexEscapeCp(cp)) {
if (cp <= 0xff) {
out += '\\x' + hex2(cp)
} else {
// Emit per UTF-16 code unit (\uXXXX each).
for (let i = 0; i < char.length; i++) {
out += '\\u' + hex4(char.charCodeAt(i))
}
}
} else {
// Verbatim.
out += char
}
}
isFirst = false
}
return out
}
/**
* Escape special characters in a string so the result can be safely
* concatenated into any regular-expression Pattern position without
* altering the meaning of surrounding syntax.
*
* Bound to native `RegExp.escape` when available (TC39 Stage 4, Node 24+ /
* V8 13.7); otherwise falls back to a spec-compliant implementation. Both
* paths satisfy the spec guarantee: `new RegExp(escapeRegExp(s))` matches
* exactly the literal string `s`.
*
* Reference: https://tc39.es/ecma262/#sec-regexp.escape
*
* @example
* ```typescript
* new RegExp(escapeRegExp('[test]')) // matches literal '[test]'
* new RegExp('[' + escapeRegExp('a-z') + ']') // matches 'a', '-', or 'z'
* ```
*/
const maybeNativeEscape = (RegExp as unknown as { escape?: unknown }).escape
export const escapeRegExp: (str: string) => string =
typeof maybeNativeEscape === 'function'
? (maybeNativeEscape as (str: string) => string)
: escapeRegExpFallback