@@ -16,7 +16,8 @@ import type { Context, Page } from '@/types'
1616// Link patterns for Markdown
1717const INTERNAL_LINK_PATTERN = / \] \( \/ [ ^ ) ] + \) / g
1818const AUTOTITLE_LINK_PATTERN = / \[ A U T O T I T L E \] \( ( [ ^ ) ] + ) \) / g
19- const EXTERNAL_LINK_PATTERN = / \] \( ( h t t p s ? : \/ \/ [ ^ ) ] + ) \) / g
19+ // Handles one level of balanced parentheses in URLs (e.g., Wikipedia links)
20+ const EXTERNAL_LINK_PATTERN = / \] \( ( h t t p s ? : \/ \/ (?: [ ^ ( ) \s ] + | \( [ ^ ( ) ] * \) ) * ) \) / g
2021const IMAGE_LINK_PATTERN = / ! \[ [ ^ \] ] * \] \( ( [ ^ ) ] + ) \) / g
2122
2223// Anchor link patterns (for same-page links)
@@ -82,10 +83,19 @@ export function extractLinksFromMarkdown(content: string): LinkExtractionResult
8283 const anchorLinks : ExtractedLink [ ] = [ ]
8384 const imageLinks : ExtractedLink [ ] = [ ]
8485
86+ // Strip fenced code blocks to avoid checking example/placeholder URLs
87+ // Replaces non-newline characters with spaces to preserve line numbers and positions
88+ const strippedContent = content . replace (
89+ / ^ { 0 , 3 } ( ` { 3 , } ) [ ^ \n ] * \n [ \s \S ] * ?^ { 0 , 3 } \1\s * $ / gm,
90+ ( match ) => {
91+ return match . replace ( / [ ^ \n ] / g, ' ' )
92+ } ,
93+ )
94+
8595 // Extract AUTOTITLE links first (they're a special case of internal links)
8696 let match
87- while ( ( match = AUTOTITLE_LINK_PATTERN . exec ( content ) ) !== null ) {
88- const { line, column } = getLineAndColumn ( content , match . index )
97+ while ( ( match = AUTOTITLE_LINK_PATTERN . exec ( strippedContent ) ) !== null ) {
98+ const { line, column } = getLineAndColumn ( strippedContent , match . index )
8999 const href = match [ 1 ] . split ( '#' ) [ 0 ] // Remove anchor if present
90100 if ( href . startsWith ( '/' ) ) {
91101 internalLinks . push ( {
@@ -102,17 +112,17 @@ export function extractLinksFromMarkdown(content: string): LinkExtractionResult
102112 AUTOTITLE_LINK_PATTERN . lastIndex = 0
103113
104114 // Extract regular internal links
105- while ( ( match = INTERNAL_LINK_PATTERN . exec ( content ) ) !== null ) {
115+ while ( ( match = INTERNAL_LINK_PATTERN . exec ( strippedContent ) ) !== null ) {
106116 // Skip if this is an AUTOTITLE link (already captured)
107117 const fullMatch = match [ 0 ]
108- if ( content . substring ( match . index - 10 , match . index ) . includes ( 'AUTOTITLE' ) ) {
118+ if ( strippedContent . substring ( match . index - 10 , match . index ) . includes ( 'AUTOTITLE' ) ) {
109119 continue
110120 }
111121
112- const { line, column } = getLineAndColumn ( content , match . index )
122+ const { line, column } = getLineAndColumn ( strippedContent , match . index )
113123 // Extract href from ](/path) format
114124 const href = fullMatch . substring ( 2 , fullMatch . length - 1 ) . split ( '#' ) [ 0 ]
115- const text = extractLinkText ( content , match . index )
125+ const text = extractLinkText ( strippedContent , match . index )
116126
117127 internalLinks . push ( {
118128 href,
@@ -127,10 +137,10 @@ export function extractLinksFromMarkdown(content: string): LinkExtractionResult
127137 INTERNAL_LINK_PATTERN . lastIndex = 0
128138
129139 // Extract external links
130- while ( ( match = EXTERNAL_LINK_PATTERN . exec ( content ) ) !== null ) {
131- const { line, column } = getLineAndColumn ( content , match . index )
140+ while ( ( match = EXTERNAL_LINK_PATTERN . exec ( strippedContent ) ) !== null ) {
141+ const { line, column } = getLineAndColumn ( strippedContent , match . index )
132142 const href = match [ 1 ]
133- const text = extractLinkText ( content , match . index )
143+ const text = extractLinkText ( strippedContent , match . index )
134144
135145 externalLinks . push ( {
136146 href,
@@ -144,8 +154,8 @@ export function extractLinksFromMarkdown(content: string): LinkExtractionResult
144154 EXTERNAL_LINK_PATTERN . lastIndex = 0
145155
146156 // Extract anchor links
147- while ( ( match = ANCHOR_LINK_PATTERN . exec ( content ) ) !== null ) {
148- const { line, column } = getLineAndColumn ( content , match . index )
157+ while ( ( match = ANCHOR_LINK_PATTERN . exec ( strippedContent ) ) !== null ) {
158+ const { line, column } = getLineAndColumn ( strippedContent , match . index )
149159 const href = match [ 0 ] . substring ( 2 , match [ 0 ] . length - 1 )
150160
151161 anchorLinks . push ( {
@@ -160,8 +170,8 @@ export function extractLinksFromMarkdown(content: string): LinkExtractionResult
160170 ANCHOR_LINK_PATTERN . lastIndex = 0
161171
162172 // Extract image links
163- while ( ( match = IMAGE_LINK_PATTERN . exec ( content ) ) !== null ) {
164- const { line, column } = getLineAndColumn ( content , match . index )
173+ while ( ( match = IMAGE_LINK_PATTERN . exec ( strippedContent ) ) !== null ) {
174+ const { line, column } = getLineAndColumn ( strippedContent , match . index )
165175 const href = match [ 1 ]
166176
167177 // Only include internal images (starting with /)
@@ -345,6 +355,19 @@ export function checkInternalLink(
345355 }
346356 }
347357
358+ // Strip language prefix and check redirects (which are stored without it)
359+ const langPrefixMatch = resolved . match ( / ^ \/ [ a - z ] { 2 } \/ / )
360+ if ( langPrefixMatch ) {
361+ const withoutLang = resolved . slice ( langPrefixMatch [ 0 ] . length - 1 )
362+ if ( redirects [ withoutLang ] ) {
363+ return {
364+ exists : true ,
365+ isRedirect : true ,
366+ redirectTarget : redirects [ withoutLang ] ,
367+ }
368+ }
369+ }
370+
348371 return { exists : false , isRedirect : false }
349372}
350373
0 commit comments