1717import markupsafe
1818from markdown .core import Markdown
1919from markdown .extensions import Extension
20+ from markdown .extensions .toc import slugify
2021from markdown .inlinepatterns import REFERENCE_RE , ReferenceInlineProcessor
2122from markdown .treeprocessors import Treeprocessor
2223from markdown .util import HTML_PLACEHOLDER_RE , INLINE_PLACEHOLDER_RE
2324
2425if TYPE_CHECKING :
26+ from collections .abc import Iterable
2527 from pathlib import Path
2628 from re import Match
2729
@@ -120,7 +122,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D107
120122
121123 # Code based on
122124 # https://github.com/Python-Markdown/markdown/blob/8e7528fa5c98bf4652deb13206d6e6241d61630b/markdown/inlinepatterns.py#L780
123-
124125 def handleMatch (self , m : Match [str ], data : str ) -> tuple [Element | None , int | None , int | None ]: # type: ignore[override] # noqa: N802
125126 """Handle an element that matched.
126127
@@ -135,19 +136,19 @@ def handleMatch(self, m: Match[str], data: str) -> tuple[Element | None, int | N
135136 if not handled :
136137 return None , None , None
137138
138- identifier , end , handled = self .evalId (data , index , text )
139+ identifier , slug , end , handled = self ._eval_id (data , index , text )
139140 if not handled or identifier is None :
140141 return None , None , None
141142
142- if re .search (r"[\x00-\x1f]" , identifier ):
143+ if slug is None and re .search (r"[\x00-\x1f]" , identifier ):
143144 # Do nothing if the matched reference contains control characters (from 0 to 31 included).
144145 # Specifically `\x01` is used by Python-Markdown HTML stash when there's inline formatting,
145146 # but references with Markdown formatting are not possible anyway.
146147 return None , m .start (0 ), end
147148
148- return self ._make_tag (identifier , text ), m .start (0 ), end
149+ return self ._make_tag (identifier , text , slug = slug ), m .start (0 ), end
149150
150- def evalId (self , data : str , index : int , text : str ) -> tuple [str | None , int , bool ]: # noqa: N802 (parent's casing)
151+ def _eval_id (self , data : str , index : int , text : str ) -> tuple [str | None , str | None , int , bool ]:
151152 """Evaluate the id portion of `[ref][id]`.
152153
153154 If `[ref][]` use `[ref]`.
@@ -158,23 +159,28 @@ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, boo
158159 text: The text to use when no identifier.
159160
160161 Returns:
161- A tuple containing the identifier, its end position, and whether it matched.
162+ A tuple containing the identifier, its optional slug, its end position, and whether it matched.
162163 """
163164 m = self .RE_LINK .match (data , pos = index )
164165 if not m :
165- return None , index , False
166+ return None , None , index , False
166167
167- identifier = m .group (1 )
168- if not identifier :
168+ if identifier := m .group (1 ):
169+ # An identifier was provided, match it exactly (later).
170+ slug = None
171+ else :
172+ # Only a title was provided, use it as identifier.
169173 identifier = text
170- # Allow the entire content to be one placeholder, with the intent of catching things like [`Foo`][].
171- # It doesn't catch [*Foo*][] though, just due to the priority order.
172- # https://github.com/Python-Markdown/markdown/blob/1858c1b601ead62ed49646ae0d99298f41b1a271/markdown/inlinepatterns.py#L78
174+
175+ # Catch single stash entries, like the result of [`Foo`][].
173176 if match := INLINE_PLACEHOLDER_RE .fullmatch (identifier ):
174177 stashed_nodes : dict [str , Element | str ] = self .md .treeprocessors ["inline" ].stashed_nodes # type: ignore[attr-defined]
175178 el = stashed_nodes .get (match [1 ])
176179 if isinstance (el , Element ) and el .tag == "code" :
180+ # The title was wrapped in backticks, we only keep the content,
181+ # and tell autorefs to match the identifier exactly.
177182 identifier = "" .join (el .itertext ())
183+ slug = None
178184 # Special case: allow pymdownx.inlinehilite raw <code> snippets but strip them back to unhighlighted.
179185 if match := HTML_PLACEHOLDER_RE .fullmatch (identifier ):
180186 stash_index = int (match .group (1 ))
@@ -183,9 +189,9 @@ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, boo
183189 self .md .htmlStash .rawHtmlBlocks [stash_index ] = escape (identifier )
184190
185191 end = m .end (0 )
186- return identifier , end , True
192+ return identifier , slug , end , True
187193
188- def _make_tag (self , identifier : str , text : str ) -> Element :
194+ def _make_tag (self , identifier : str , text : str , * , slug : str | None = None ) -> Element :
189195 """Create a tag that can be matched by `AUTO_REF_RE`.
190196
191197 Arguments:
@@ -201,6 +207,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
201207 el .attrib .update (self .hook .get_context ().as_dict ())
202208 el .set ("identifier" , identifier )
203209 el .text = text
210+ if slug :
211+ el .attrib ["slug" ] = slug
204212 return el
205213
206214
@@ -300,6 +308,7 @@ class _AutorefsAttrs(dict):
300308 "origin" ,
301309 "filepath" ,
302310 "lineno" ,
311+ "slug" ,
303312 }
304313
305314 @property
@@ -337,6 +346,15 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
337346_html_attrs_parser = _HTMLAttrsParser ()
338347
339348
349+ def _find_url (identifiers : Iterable [str ], url_mapper : Callable [[str ], str ]) -> str :
350+ for identifier in identifiers :
351+ try :
352+ return url_mapper (identifier )
353+ except KeyError :
354+ pass
355+ raise KeyError (f"None of the identifiers { identifiers } were found" )
356+
357+
340358def fix_ref (
341359 url_mapper : Callable [[str ], str ],
342360 unmapped : list [tuple [str , AutorefsHookInterface .Context | None ]],
@@ -363,11 +381,14 @@ def inner(match: Match) -> str:
363381 title = match ["title" ]
364382 attrs = _html_attrs_parser .parse (f"<a { match ['attrs' ]} >" )
365383 identifier : str = attrs ["identifier" ]
384+ slug = attrs .get ("slug" , None )
366385 optional = "optional" in attrs
367386 hover = "hover" in attrs
368387
388+ identifiers = (identifier , slug ) if slug else (identifier ,)
389+
369390 try :
370- url = url_mapper ( unescape ( identifier ) )
391+ url = _find_url ( identifiers , url_mapper )
371392 except KeyError :
372393 if optional :
373394 if hover :
@@ -376,6 +397,8 @@ def inner(match: Match) -> str:
376397 unmapped .append ((identifier , attrs .context ))
377398 if title == identifier :
378399 return f"[{ identifier } ][]"
400+ if title == f"<code>{ identifier } </code>" and not slug :
401+ return f"[<code>{ identifier } </code>][]"
379402 return f"[{ title } ][{ identifier } ]"
380403
381404 parsed = urlsplit (url )
0 commit comments