Skip to content

Commit 17665e2

Browse files
committed
refactor: Register headings with a Markdown processor rather than the on_page_content hook and the table of contents
1 parent 4664b97 commit 17665e2

File tree

5 files changed

+89
-18
lines changed

5 files changed

+89
-18
lines changed

src/mkdocs_autorefs/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
AutorefsExtension,
1515
AutorefsHookInterface,
1616
AutorefsInlineProcessor,
17+
HeadingScannerTreeProcessor,
1718
fix_ref,
1819
fix_refs,
1920
relative_url,
@@ -31,6 +32,7 @@
3132
"Backlink",
3233
"BacklinkCrumb",
3334
"BacklinksTreeProcessor",
35+
"HeadingScannerTreeProcessor",
3436
"fix_ref",
3537
"fix_refs",
3638
"relative_url",

src/mkdocs_autorefs/_internal/plugin.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -211,35 +211,32 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: #
211211
212212
Returns:
213213
The same Markdown. We only use this hook to keep a reference to the current page URL,
214-
used during Markdown conversion by the anchor scanner tree processor.
214+
used during Markdown conversion by the anchor/heading scanner tree processors.
215215
"""
216216
# YORE: Bump 2: Remove line.
217217
self._url_to_page[page.url] = page
218218
self.current_page = page
219219
return markdown
220220

221221
def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
222-
"""Map anchors to URLs.
222+
"""Register breadcrumbs.
223223
224224
Hook for the [`on_page_content` event](https://www.mkdocs.org/user-guide/plugins/#on_page_content).
225-
In this hook, we map the IDs of every anchor found in the table of contents to the anchors absolute URLs.
226-
This mapping will be used later to fix unresolved reference of the form `[title][identifier]` or
227-
`[identifier][]`.
225+
In this hook, we register breadcrumbs for each heading on each page.
226+
These breadcrumbs are used to provide contextual information in backlinks.
228227
229228
Arguments:
230229
html: HTML converted from Markdown.
231230
page: The related MkDocs page instance.
232231
kwargs: Additional arguments passed by MkDocs.
233232
234233
Returns:
235-
The same HTML. We only use this hook to map anchors to URLs.
234+
The same HTML.
236235
"""
237236
self.current_page = page
238-
# Collect `std`-domain URLs.
239-
if self.scan_toc:
240-
_log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
237+
if self.record_backlinks:
241238
for item in page.toc.items:
242-
self.map_urls(page, item)
239+
self._register_breadcrumbs(page, item)
243240
return html
244241

245242
@event_priority(-50) # Late, after mkdocstrings has finished loading inventories.
@@ -299,30 +296,43 @@ def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) ->
299296
# ----------------------------------------------------------------------- #
300297
# Utilities #
301298
# ----------------------------------------------------------------------- #
302-
# TODO: Maybe stop exposing this method in the future.
299+
# YORE: Bump 2: Remove block.
303300
def map_urls(self, page: Page, anchor: AnchorLink) -> None:
304-
"""Recurse on every anchor to map its ID to its absolute URL.
301+
"""Deprecated. Recurse on every anchor to map its ID to its absolute URL.
302+
303+
This method is deprecated and will be removed in a future release.
304+
Please use the `register_anchor` or `register_url` methods instead.
305305
306306
This method populates `self._primary_url_map` by side-effect.
307307
308308
Arguments:
309309
page: The page containing the anchors.
310310
anchor: The anchor to process and to recurse on.
311311
"""
312-
return self._map_urls(page, anchor)
312+
warn(
313+
"The `map_urls` method is deprecated and will be removed in a future release. "
314+
"Please use the `register_anchor` or `register_url` methods instead.",
315+
DeprecationWarning,
316+
stacklevel=2,
317+
)
318+
self._map_urls(page, anchor)
313319

314-
def _map_urls(self, page: Page, anchor: AnchorLink, parent: BacklinkCrumb | None = None) -> None:
315-
# YORE: Bump 2: Remove block.
320+
# YORE: Bump 2: Remove block.
321+
def _map_urls(self, page: Page | str, anchor: AnchorLink) -> None:
316322
if isinstance(page, str):
317323
try:
318324
page = self._url_to_page[page]
319325
except KeyError:
320326
page = self.current_page # type: ignore[assignment]
321327

322-
self.register_anchor(page, anchor.id, title=anchor.title, primary=True)
328+
self.register_anchor(page, anchor.id, title=anchor.title, primary=True) # type: ignore[arg-type]
329+
self._register_breadcrumbs(page, anchor) # type: ignore[arg-type]
330+
331+
def _register_breadcrumbs(self, page: Page, anchor: AnchorLink, parent: BacklinkCrumb | None = None) -> None:
332+
# Getting a breadcrumb has a side-effect of registering it in the breadcrumbs map.
323333
breadcrumb = self._get_breadcrumb(page, anchor, parent)
324334
for child in anchor.children:
325-
self._map_urls(page, child, breadcrumb)
335+
self._register_breadcrumbs(page, child, breadcrumb)
326336

327337
def _get_breadcrumb(
328338
self,

src/mkdocs_autorefs/_internal/references.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,45 @@ def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors, last_
282282
self.run(el)
283283

284284

285+
class HeadingScannerTreeProcessor(Treeprocessor):
286+
"""Tree processor to scan and register HTML headings."""
287+
288+
name: str = "mkdocs-autorefs-headings-scanner"
289+
"""The name of the tree processor."""
290+
291+
_htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"}
292+
293+
def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
294+
"""Initialize the tree processor.
295+
296+
Parameters:
297+
plugin: A reference to the autorefs plugin, to use its `register_anchor` method.
298+
"""
299+
super().__init__(md)
300+
self._plugin = plugin
301+
302+
def run(self, root: Element) -> None:
303+
"""Run the tree processor.
304+
305+
Arguments:
306+
root: The root element of the tree.
307+
"""
308+
if self._plugin.current_page is not None:
309+
self._scan_headings(root)
310+
311+
def _scan_headings(self, parent: Element) -> None:
312+
for el in parent:
313+
if el.tag in self._htags:
314+
if h_id := el.get("id"):
315+
self._plugin.register_anchor(
316+
self._plugin.current_page, # type: ignore[arg-type]
317+
h_id,
318+
title=el.text,
319+
)
320+
else:
321+
self._scan_headings(el)
322+
323+
285324
class AutorefsExtension(Extension):
286325
"""Markdown extension that transforms unresolved references into auto-references.
287326
@@ -323,6 +362,13 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
323362
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
324363
)
325364
if self.plugin is not None:
365+
# Scan headings to register them.
366+
if self.plugin.scan_toc:
367+
md.treeprocessors.register(
368+
HeadingScannerTreeProcessor(self.plugin, md),
369+
HeadingScannerTreeProcessor.name,
370+
priority=0,
371+
)
326372
# Markdown anchors require the `attr_list` extension.
327373
if self.plugin.scan_toc and "attr_list" in md.treeprocessors:
328374
_log_enabling_markdown_anchors()

tests/test_backlinks.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@ def test_get_backlinks() -> None:
2626
"""Check that backlinks can be retrieved."""
2727
plugin = AutorefsPlugin()
2828
plugin.record_backlinks = True
29-
plugin.map_urls(create_page("foo.html"), create_anchor_link("Foo", "foo"))
29+
page = create_page("foo.html")
30+
anchor = create_anchor_link("Foo", "foo")
31+
plugin.register_anchor(page, anchor.id, title=anchor.title, primary=True)
32+
plugin._register_breadcrumbs(page, anchor)
3033
plugin._primary_url_map["bar"] = ["bar.html#bar"]
3134
plugin._record_backlink("bar", "referenced-by", "foo", "foo.html")
3235
assert plugin.get_backlinks("bar", from_url="") == {

tests/test_references.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,18 +344,25 @@ def test_register_markdown_anchors() -> None:
344344
)
345345
assert plugin._primary_url_map == {
346346
"foo": ["page#heading-foo"],
347+
"heading-foo": ["page#heading-foo"],
347348
"bar": ["page#bar"],
349+
"heading-bar": ["page#heading-bar"],
348350
"alias1": ["page#heading-bar"],
349351
"alias2": ["page#heading-bar"],
350352
"alias3": ["page#alias3"],
351353
"alias4": ["page#heading-baz"],
354+
"heading-baz": ["page#heading-baz"],
352355
"alias5": ["page#alias5"],
353356
"alias6": ["page#alias6"],
357+
"heading-more1": ["page#heading-more1"],
354358
"alias7": ["page#alias7"],
355359
"alias8": ["page#alias8"],
356360
"alias9": ["page#heading-custom2"],
361+
"heading-custom2": ["page#heading-custom2"],
357362
"alias10": ["page#alias10"],
358363
"aliasSame": ["page#same-heading-1", "page#same-heading-2"],
364+
"same-heading-1": ["page#same-heading-1"],
365+
"same-heading-2": ["page#same-heading-2"],
359366
}
360367

361368

@@ -380,6 +387,9 @@ def test_register_markdown_anchors_with_admonition() -> None:
380387
),
381388
)
382389
assert plugin._primary_url_map == {
390+
"heading-foo": ["page#heading-foo"],
391+
"heading-bar": ["page#heading-bar"],
392+
"heading-baz": ["page#heading-baz"],
383393
"alias1": ["page#alias1"],
384394
"alias2": ["page#heading-bar"],
385395
"alias3": ["page#alias3"],

0 commit comments

Comments
 (0)