Skip to content

Commit c490765

Browse files
authored
Consolidate search index failure notifications into single message (#59495)
1 parent 189cfab commit c490765

File tree

4 files changed

+378
-9
lines changed

4 files changed

+378
-9
lines changed

.github/workflows/index-general-search.yml

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,21 +230,64 @@ jobs:
230230
FASTLY_SURROGATE_KEY: api-search:${{ matrix.language }}
231231
run: npm run purge-fastly-edge-cache
232232

233-
- name: Alert on scraping failures
234-
if: ${{ steps.check-failures.outputs.has_failures == 'true' && github.event_name != 'workflow_dispatch' }}
235-
uses: ./.github/actions/slack-alert
233+
- name: Upload failures artifact
234+
if: ${{ steps.check-failures.outputs.has_failures == 'true' }}
235+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
236+
with:
237+
name: search-failures-${{ matrix.language }}
238+
path: /tmp/records/failures-summary.json
239+
retention-days: 1
240+
241+
- uses: ./.github/actions/slack-alert
242+
if: ${{ failure() && github.event_name != 'workflow_dispatch' }}
236243
with:
237244
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
238245
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
239-
message: |
240-
:warning: ${{ steps.check-failures.outputs.failed_pages }} page(s) failed to scrape for general search indexing (language: ${{ matrix.language }})
241246

242-
The indexing completed but some pages could not be scraped. This may affect search results for those pages.
247+
notifyScrapingFailures:
248+
name: Notify scraping failures
249+
needs: updateElasticsearchIndexes
250+
if: ${{ always() && github.repository == 'github/docs-internal' && github.event_name != 'workflow_dispatch' && needs.updateElasticsearchIndexes.result != 'cancelled' }}
251+
runs-on: ubuntu-latest
252+
steps:
253+
- name: Check out repo
254+
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
255+
256+
- name: Download all failure artifacts
257+
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
258+
with:
259+
pattern: search-failures-*
260+
path: /tmp/failures
261+
continue-on-error: true
243262

244-
Workflow: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
263+
- name: Check if any failures were downloaded
264+
id: check-artifacts
265+
run: |
266+
if [ -d /tmp/failures ] && [ "$(ls -A /tmp/failures 2>/dev/null)" ]; then
267+
echo "has_artifacts=true" >> $GITHUB_OUTPUT
268+
else
269+
echo "has_artifacts=false" >> $GITHUB_OUTPUT
270+
fi
245271
246-
- uses: ./.github/actions/slack-alert
247-
if: ${{ failure() && github.event_name != 'workflow_dispatch' }}
272+
- uses: ./.github/actions/node-npm-setup
273+
if: ${{ steps.check-artifacts.outputs.has_artifacts == 'true' }}
274+
275+
- name: Aggregate failures and format message
276+
if: ${{ steps.check-artifacts.outputs.has_artifacts == 'true' }}
277+
id: aggregate
278+
run: |
279+
RESULT=$(npx tsx src/search/scripts/aggregate-search-index-failures.ts /tmp/failures \
280+
--workflow-url "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}")
281+
{
282+
echo 'result<<EOF'
283+
echo "$RESULT"
284+
echo 'EOF'
285+
} >> "$GITHUB_OUTPUT"
286+
287+
- name: Send consolidated Slack notification
288+
if: ${{ steps.check-artifacts.outputs.has_artifacts == 'true' }}
289+
uses: ./.github/actions/slack-alert
248290
with:
249291
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
250292
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
293+
message: ${{ fromJSON(steps.aggregate.outputs.result).message }}

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"fixture-dev-debug": "cross-env NODE_ENV=development ROOT=src/fixtures/fixtures nodemon --inspect src/frame/server.ts",
4747
"fixture-test": "cross-env ROOT=src/fixtures/fixtures sh -c 'npm test -- ${1:-src/fixtures/tests}' --",
4848
"fr-add-docs-reviewers-requests": "tsx src/workflows/fr-add-docs-reviewers-requests.ts",
49+
"aggregate-search-index-failures": "tsx src/search/scripts/aggregate-search-index-failures.ts",
4950
"general-search-scrape": "tsx src/search/scripts/scrape/scrape-cli.ts",
5051
"general-search-scrape-server": "cross-env NODE_ENV=production PORT=4002 MINIMAL_RENDER=true CHANGELOG_DISABLED=true tsx src/frame/server.ts",
5152
"ghes-release-scrape-with-server": "cross-env GHES_RELEASE=1 start-server-and-test general-search-scrape-server 4002 general-search-scrape",
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#!/usr/bin/env tsx
2+
/**
3+
* Aggregates search index failures from multiple language runs into a single
4+
* consolidated report. Groups failures by page path to show which versions
5+
* and languages failed for each page.
6+
*
7+
* Usage: tsx aggregate-search-index-failures.ts <artifacts-dir> [--workflow-url <url>]
8+
*
9+
* Reads failures-summary.json files from subdirectories and outputs a formatted
10+
* message suitable for Slack notifications.
11+
*/
12+
13+
import fs from 'fs'
14+
import path from 'path'
15+
16+
interface Failure {
17+
url?: string
18+
relativePath?: string
19+
error: string
20+
errorType: string
21+
}
22+
23+
interface LanguageFailures {
24+
indexName: string
25+
languageCode: string
26+
indexVersion: string
27+
failures: Failure[]
28+
}
29+
30+
export interface FailuresSummary {
31+
totalFailedPages: number
32+
failures: LanguageFailures[]
33+
}
34+
35+
interface PageFailure {
36+
versions: Set<string>
37+
languages: Set<string>
38+
}
39+
40+
export interface AggregationResult {
41+
hasFailures: boolean
42+
message: string
43+
totalCount?: number
44+
}
45+
46+
/**
47+
* Aggregates failures from multiple summaries into a single report.
48+
* Groups failures by page path to show which versions and languages failed for each.
49+
*/
50+
export function aggregateFailures(
51+
allFailures: FailuresSummary[],
52+
workflowUrl?: string,
53+
): AggregationResult {
54+
if (allFailures.length === 0) {
55+
return { hasFailures: false, message: '' }
56+
}
57+
58+
// Group failures by page path
59+
const pageFailures = new Map<string, PageFailure>()
60+
61+
for (const summary of allFailures) {
62+
for (const langFailures of summary.failures) {
63+
for (const failure of langFailures.failures) {
64+
const pagePath = failure.relativePath || failure.url || 'unknown'
65+
66+
if (!pageFailures.has(pagePath)) {
67+
pageFailures.set(pagePath, {
68+
versions: new Set(),
69+
languages: new Set(),
70+
})
71+
}
72+
73+
const pageData = pageFailures.get(pagePath)!
74+
pageData.versions.add(langFailures.indexVersion)
75+
pageData.languages.add(langFailures.languageCode)
76+
}
77+
}
78+
}
79+
80+
// Use unique page count, not total failure instances
81+
const uniquePageCount = pageFailures.size
82+
83+
// Format the message
84+
const lines: string[] = [
85+
`:warning: ${uniquePageCount} page(s) failed to scrape for general search indexing`,
86+
'',
87+
'The indexing completed but some pages could not be scraped. This may affect search results for those pages.',
88+
'',
89+
]
90+
91+
// Sort pages alphabetically and format each
92+
const sortedPages = Array.from(pageFailures.entries()).sort((a, b) => a[0].localeCompare(b[0]))
93+
94+
for (const [pagePath, data] of sortedPages) {
95+
const versions = Array.from(data.versions).sort().join(', ')
96+
const languages = Array.from(data.languages).sort().join(', ')
97+
lines.push(`• \`${pagePath}\` (versions: ${versions}, languages: ${languages})`)
98+
}
99+
100+
if (workflowUrl) {
101+
lines.push('')
102+
lines.push(`Workflow: ${workflowUrl}`)
103+
}
104+
105+
const message = lines.join('\n')
106+
107+
return { hasFailures: true, message, totalCount: uniquePageCount }
108+
}
109+
110+
/**
111+
* Reads failure summaries from artifact directories.
112+
*/
113+
export function readFailureSummaries(artifactsDir: string): FailuresSummary[] {
114+
const allFailures: FailuresSummary[] = []
115+
const subdirs = fs.readdirSync(artifactsDir, { withFileTypes: true })
116+
117+
for (const subdir of subdirs) {
118+
if (!subdir.isDirectory()) continue
119+
120+
const summaryPath = path.join(artifactsDir, subdir.name, 'failures-summary.json')
121+
if (fs.existsSync(summaryPath)) {
122+
const content = fs.readFileSync(summaryPath, 'utf-8')
123+
try {
124+
allFailures.push(JSON.parse(content) as FailuresSummary)
125+
} catch (error) {
126+
const message = error instanceof Error ? error.message : String(error)
127+
console.warn(`Warning: Failed to parse JSON in ${summaryPath}: ${message}`)
128+
}
129+
}
130+
}
131+
132+
return allFailures
133+
}
134+
135+
function main() {
136+
const args = process.argv.slice(2)
137+
const artifactsDir = args[0]
138+
const workflowUrlIndex = args.indexOf('--workflow-url')
139+
const workflowUrl = workflowUrlIndex !== -1 ? args[workflowUrlIndex + 1] : undefined
140+
141+
if (!artifactsDir) {
142+
console.error(
143+
'Usage: tsx aggregate-search-index-failures.ts <artifacts-dir> [--workflow-url <url>]',
144+
)
145+
process.exit(1)
146+
}
147+
148+
const allFailures = readFailureSummaries(artifactsDir)
149+
const result = aggregateFailures(allFailures, workflowUrl)
150+
console.log(JSON.stringify(result))
151+
}
152+
153+
// Only run main when executed directly (not imported)
154+
if (import.meta.url === `file://${process.argv[1]}`) {
155+
main()
156+
}

0 commit comments

Comments
 (0)