Skip to content

Commit 559c723

Browse files
committed
feat: Separate URLs in two groups, primary and secondary
This will be useful in mkdocstrings, where we want to register URLs for all aliases of a rendered object's identifier early in the process, so that we can drop the fallback mechanism in autorefs. Primary URLs will take precedence when resolving cross-references, to avoid logging warnings about multiple URLs found. For example: - Object `a.b.c.d` has aliases `a.b.d` and `a.d` - Object `a.b.c.d` is rendered. - We register `a.b.c.d` -> page#a.b.c.d as primary - We register `a.b.d` -> page#a.b.c.d as secondary - We register `a.d` -> page#a.b.c.d as secondary - Later, if `a.b.d` or `a.d` are rendered, we will register primary and secondary URLs the same way - This way we are sure that each of `a.b.c.d`, `a.b.d` or `a.d` will link to their primary URL, if any, or their secondary URL, accordingly Related-to-issue-61: #61
1 parent 88f1dc9 commit 559c723

File tree

4 files changed

+70
-23
lines changed

4 files changed

+70
-23
lines changed

src/mkdocs_autorefs/plugin.py

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,23 +81,54 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
8181
def __init__(self) -> None:
8282
"""Initialize the object."""
8383
super().__init__()
84-
self._url_map: dict[str, list[str]] = {}
84+
85+
# The plugin uses three URL maps, one for "primary" URLs, one for "secondary" URLs,
86+
# and one for "absolute" URLs.
87+
#
88+
# - A primary URL is an identifier that links to a specific anchor on a page.
89+
# - A secondary URL is an alias of an identifier that links to the same anchor as the identifier's primary URL.
90+
# Primary URLs with these aliases as identifiers may or may not be rendered later.
91+
# - An absolute URL is an identifier that links to an external resource.
92+
# These URLs are typically registered by mkdocstrings when loading object inventories.
93+
#
94+
# For example, mkdocstrings registers a primary URL for each heading rendered in a page.
95+
# Then, for each alias of this heading's identifier, it registers a secondary URL.
96+
#
97+
# We need to keep track of whether an identifier is primary or secondary,
98+
# to give it precedence when resolving cross-references.
99+
# We wouldn't want to log a warning if there is a single primary URL and one or more secondary URLs,
100+
# instead we want to use the primary URL without any warning.
101+
#
102+
# - A single primary URL mapped to an identifer? Use it.
103+
# - Multiple primary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
104+
# - No primary URL mapped to an identifier, but a secondary URL mapped? Use it.
105+
# - Multiple secondary URLs mapped to an identifier? Use the first one, or closest one if configured as such.
106+
# - No secondary URL mapped to an identifier? Try using absolute URLs
107+
# (typically registered by loading inventories in mkdocstrings).
108+
#
109+
# This logic unfolds in `_get_item_url`.
110+
self._primary_url_map: dict[str, list[str]] = {}
111+
self._secondary_url_map: dict[str, list[str]] = {}
85112
self._abs_url_map: dict[str, str] = {}
113+
86114
self.get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
87115

88-
def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
116+
def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
89117
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
90118
91119
Arguments:
92120
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
93-
identifier: The HTML anchor (without '#') as a string.
121+
identifier: The identifier to register.
122+
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
123+
primary: Whether this anchor is the primary one for the identifier.
94124
"""
95125
page_anchor = f"{page}#{anchor or identifier}"
96-
if identifier in self._url_map:
97-
if page_anchor not in self._url_map[identifier]:
98-
self._url_map[identifier].append(page_anchor)
126+
url_map = self._primary_url_map if primary else self._secondary_url_map
127+
if identifier in url_map:
128+
if page_anchor not in url_map[identifier]:
129+
url_map[identifier].append(page_anchor)
99130
else:
100-
self._url_map[identifier] = [page_anchor]
131+
url_map[identifier] = [page_anchor]
101132

102133
def register_url(self, identifier: str, url: str) -> None:
103134
"""Register that the identifier should be turned into a link to this URL.
@@ -109,12 +140,13 @@ def register_url(self, identifier: str, url: str) -> None:
109140
self._abs_url_map[identifier] = url
110141

111142
@staticmethod
112-
def _get_closest_url(from_url: str, urls: list[str]) -> str:
143+
def _get_closest_url(from_url: str, urls: list[str], qualifier: str) -> str:
113144
"""Return the closest URL to the current page.
114145
115146
Arguments:
116147
from_url: The URL of the base page, from which we link towards the targeted pages.
117148
urls: A list of URLs to choose from.
149+
qualifier: The type of URLs we are choosing from.
118150
119151
Returns:
120152
The closest URL to the current page.
@@ -130,8 +162,9 @@ def _get_closest_url(from_url: str, urls: list[str]) -> str:
130162

131163
if not candidates:
132164
log.warning(
133-
"Could not find closest URL (from %s, candidates: %s). "
165+
"Could not find closest %s URL (from %s, candidates: %s). "
134166
"Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
167+
qualifier,
135168
from_url,
136169
urls,
137170
)
@@ -141,14 +174,20 @@ def _get_closest_url(from_url: str, urls: list[str]) -> str:
141174
log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
142175
return winner
143176

177+
def _get_urls(self, identifier: str) -> tuple[list[str], str]:
178+
try:
179+
return self._primary_url_map[identifier], "primary"
180+
except KeyError:
181+
return self._secondary_url_map[identifier], "secondary"
182+
144183
def _get_item_url(
145184
self,
146185
identifier: str,
147186
fallback: Callable[[str], Sequence[str]] | None = None,
148187
from_url: str | None = None,
149188
) -> str:
150189
try:
151-
urls = self._url_map[identifier]
190+
urls, qualifier = self._get_urls(identifier)
152191
except KeyError:
153192
if identifier in self._abs_url_map:
154193
return self._abs_url_map[identifier]
@@ -157,16 +196,17 @@ def _get_item_url(
157196
for new_identifier in new_identifiers:
158197
with contextlib.suppress(KeyError):
159198
url = self._get_item_url(new_identifier)
160-
self._url_map[identifier] = [url]
199+
self._secondary_url_map[identifier] = [url]
161200
return url
162201
raise
163202

164203
if len(urls) > 1:
165204
if self.config.resolve_closest and from_url is not None:
166-
return self._get_closest_url(from_url, urls)
205+
return self._get_closest_url(from_url, urls, qualifier)
167206
log.warning(
168-
"Multiple URLs found for '%s': %s. "
207+
"Multiple %s URLs found for '%s': %s. "
169208
"Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
209+
qualifier,
170210
identifier,
171211
urls,
172212
)
@@ -252,13 +292,13 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
252292
def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
253293
"""Recurse on every anchor to map its ID to its absolute URL.
254294
255-
This method populates `self.url_map` by side-effect.
295+
This method populates `self._primary_url_map` by side-effect.
256296
257297
Arguments:
258298
base_url: The base URL to use as a prefix for each anchor's relative URL.
259299
anchor: The anchor to process and to recurse on.
260300
"""
261-
self.register_anchor(base_url, anchor.id)
301+
self.register_anchor(base_url, anchor.id, primary=True)
262302
for child in anchor.children:
263303
self.map_urls(base_url, child)
264304

src/mkdocs_autorefs/references.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ def append(self, anchor: str) -> None:
506506

507507
def flush(self, alias_to: str | None = None) -> None:
508508
for anchor in self.anchors:
509-
self.plugin.register_anchor(self.current_page, anchor, alias_to)
509+
self.plugin.register_anchor(self.current_page, anchor, alias_to, primary=True)
510510
self.anchors.clear()
511511

512512

tests/test_plugin.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
def test_url_registration() -> None:
1111
"""Check that URLs can be registered, then obtained."""
1212
plugin = AutorefsPlugin()
13-
plugin.register_anchor(identifier="foo", page="foo1.html")
13+
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
1414
plugin.register_url(identifier="bar", url="https://example.org/bar.html")
1515

1616
assert plugin.get_item_url("foo") == "foo1.html#foo"
@@ -22,7 +22,7 @@ def test_url_registration() -> None:
2222
def test_url_registration_with_from_url() -> None:
2323
"""Check that URLs can be registered, then obtained, relative to a page."""
2424
plugin = AutorefsPlugin()
25-
plugin.register_anchor(identifier="foo", page="foo1.html")
25+
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
2626
plugin.register_url(identifier="bar", url="https://example.org/bar.html")
2727

2828
assert plugin.get_item_url("foo", from_url="a/b.html") == "../foo1.html#foo"
@@ -34,7 +34,7 @@ def test_url_registration_with_from_url() -> None:
3434
def test_url_registration_with_fallback() -> None:
3535
"""Check that URLs can be registered, then obtained through a fallback."""
3636
plugin = AutorefsPlugin()
37-
plugin.register_anchor(identifier="foo", page="foo1.html")
37+
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
3838
plugin.register_url(identifier="bar", url="https://example.org/bar.html")
3939

4040
# URL map will be updated with baz -> foo1.html#foo
@@ -53,7 +53,7 @@ def test_url_registration_with_fallback() -> None:
5353
def test_dont_make_relative_urls_relative_again() -> None:
5454
"""Check that URLs are not made relative more than once."""
5555
plugin = AutorefsPlugin()
56-
plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html")
56+
plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html", primary=True)
5757

5858
for _ in range(2):
5959
assert (
@@ -83,4 +83,11 @@ def test_dont_make_relative_urls_relative_again() -> None:
8383
)
8484
def test_find_closest_url(base: str, urls: list[str], expected: str) -> None:
8585
"""Find closest URLs given a list of URLs."""
86-
assert AutorefsPlugin._get_closest_url(base, urls) == expected
86+
assert AutorefsPlugin._get_closest_url(base, urls, "test") == expected
87+
88+
89+
def test_register_secondary_url() -> None:
90+
"""Test registering secondary URLs."""
91+
plugin = AutorefsPlugin()
92+
plugin.register_anchor(identifier="foo", page="foo.html", primary=False)
93+
assert plugin._secondary_url_map == {"foo": ["foo.html#foo"]}

tests/test_references.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ def test_register_markdown_anchors() -> None:
342342
""",
343343
),
344344
)
345-
assert plugin._url_map == {
345+
assert plugin._primary_url_map == {
346346
"foo": ["page#heading-foo"],
347347
"bar": ["page#bar"],
348348
"alias1": ["page#heading-bar"],
@@ -379,7 +379,7 @@ def test_register_markdown_anchors_with_admonition() -> None:
379379
""",
380380
),
381381
)
382-
assert plugin._url_map == {
382+
assert plugin._primary_url_map == {
383383
"alias1": ["page#alias1"],
384384
"alias2": ["page#heading-bar"],
385385
"alias3": ["page#alias3"],

0 commit comments

Comments
 (0)