Article API: strip HTML spans from secret scanning patterns (#60077)

heiskr · Copilot · web-flow · commit 7ecd7d9bd720 · 2026-03-12T09:19:37.000-07:00
Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/src/article-api/tests/secret-scanning-transformer.ts b/src/article-api/tests/secret-scanning-transformer.ts
@@ -20,6 +20,12 @@ describe('secret scanning article body api', () => {
     // Verify HTML comments are stripped
     expect(res.body).not.toMatch(/<!--.*?-->/)
 
+    // Verify HTML icon spans are not present (would be replaced with ✓/✗)
+    expect(res.body).not.toMatch(/<span[^>]*aria-label="Supported"/)
+    expect(res.body).not.toMatch(/<span[^>]*aria-label="Unsupported"/)
+    // Verify no raw HTML span tags remain
+    expect(res.body).not.toMatch(/<span[^>]*>/)
+
     // Verify table content is present with providers
     expect(res.body).toMatch(/|\s*Provider\s*|/)
     expect(res.body).toMatch(/\| (Adafruit|AWS|Alibaba|Amazon)/)
diff --git a/src/article-api/transformers/secret-scanning-transformer.ts b/src/article-api/transformers/secret-scanning-transformer.ts
@@ -69,6 +69,19 @@ export class SecretScanningTransformer implements PageTransformer {
     // Strip HTML comments from the rendered content
     content = content.replace(/<!--.*?-->/gs, '')
 
+    // Replace HTML icon spans with plain text equivalents
+    content = content.replace(/<span[^>]*aria-label="Supported"[^>]*>[^<]*<\/span>/g, '✓')
+    content = content.replace(/<span[^>]*aria-label="Unsupported"[^>]*>[^<]*<\/span>/g, '✗')
+    // Convert <br/> tags to newlines and <a href="...">text</a> to markdown links
+    content = content.replace(/<br\s*\/?>/gi, '\n')
+    content = content.replace(/<a\s+href="([^"]*)"[^>]*>([^<]*)<\/a>/gi, '[$2]($1)')
+    // Strip any remaining HTML tags (loop to handle nested/malformed tags)
+    let previous = ''
+    while (content !== previous) {
+      previous = content
+      content = content.replace(/<[^>]+>/g, '')
+    }
+
     // Normalize whitespace after stripping comments
     content = content.replace(/\n{3,}/g, '\n\n').trim()