diff --git a/CHANGELOG.md b/CHANGELOG.md index c2dd6795c..acd59572f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgraded `tar` to `^7.5.16`. [#1338](https://github.com/sourcebot-dev/sourcebot/pull/1338) - Upgraded `esbuild` to `^0.28.1`. [#1342](https://github.com/sourcebot-dev/sourcebot/pull/1342) - Enabled Next.js version skew protection to fix "Failed to load chunk" errors during rolling deploys. [#1346](https://github.com/sourcebot-dev/sourcebot/pull/1346) +- Split GitLab group sync into bounded direct-project and subgroup page fetches to avoid timeouts on large namespaces. [#1351](https://github.com/sourcebot-dev/sourcebot/pull/1351) ## [5.0.3] - 2026-06-17 diff --git a/packages/backend/src/gitlab.test.ts b/packages/backend/src/gitlab.test.ts index a502488df..7473084cd 100644 --- a/packages/backend/src/gitlab.test.ts +++ b/packages/backend/src/gitlab.test.ts @@ -1,6 +1,6 @@ -import { expect, test } from 'vitest'; -import { shouldExcludeProject } from './gitlab'; -import { ProjectSchema } from '@gitbeaker/rest'; +import { expect, test, vi } from 'vitest'; +import { getGitLabProjectsForGroupTree, shouldExcludeProject } from './gitlab'; +import { Gitlab, GroupSchema, ProjectSchema } from '@gitbeaker/rest'; test('shouldExcludeProject returns false when the project is not excluded.', () => { @@ -154,3 +154,129 @@ test('shouldExcludeProject include.topics matching is case-sensitive on the proj include: { topics: ['backend'] }, })).toBe(true); }); + +test('getGitLabProjectsForGroupTree walks paginated subgroup trees without includeSubgroups queries.', async () => { + const rootProjectPage1 = { + id: 1, + path_with_namespace: 'root/project-a', + } as ProjectSchema; + const rootProjectPage2 = { + id: 2, + path_with_namespace: 'root/project-b', + } as ProjectSchema; + const childProject = { + id: 3, + path_with_namespace: 'root/child/project-c', + } as ProjectSchema; + const grandchildProject = { + id: 4, + path_with_namespace: 'root/child/grandchild/project-d', + } as ProjectSchema; + + const projectsByGroupPage = new Map([ + ['root:1', [rootProjectPage1]], + ['root:2', [rootProjectPage2]], + ['root/child:1', [childProject]], + ['root/child/grandchild:1', [grandchildProject]], + ]); + + const subgroupsByGroupPage = new Map([ + ['root:1', [ + { + id: 10, + full_path: 'root/child', + } as GroupSchema, + ]], + ['root:2', [ + { + id: 11, + full_path: 'root/other-child', + } as GroupSchema, + ]], + ['root/child:1', [ + { + id: 12, + full_path: 'root/child/grandchild', + } as GroupSchema, + ]], + ]); + + const api = { + Groups: { + allProjects: vi.fn(async (group: string | number, options: { page: number }) => ({ + data: projectsByGroupPage.get(`${group}:${options.page}`) ?? [], + paginationInfo: { + next: group === 'root' && options.page === 1 ? 2 : null, + }, + })), + allSubgroups: vi.fn(async (group: string | number, options: { page: number }) => ({ + data: subgroupsByGroupPage.get(`${group}:${options.page}`) ?? [], + paginationInfo: { + next: group === 'root' && options.page === 1 ? 2 : null, + }, + })), + }, + } as unknown as InstanceType; + + const projects = await getGitLabProjectsForGroupTree(api, 'root'); + + expect(projects.map(project => project.path_with_namespace)).toEqual([ + 'root/project-a', + 'root/project-b', + 'root/child/project-c', + 'root/child/grandchild/project-d', + ]); + + expect(api.Groups.allProjects).toHaveBeenCalledWith('root', expect.objectContaining({ + page: 1, + perPage: 100, + pagination: 'offset', + showExpanded: true, + includeSubgroups: false, + })); + expect(api.Groups.allProjects).toHaveBeenCalledWith('root', expect.objectContaining({ + page: 2, + includeSubgroups: false, + })); + expect(api.Groups.allProjects).toHaveBeenCalledWith('root/child', expect.objectContaining({ + includeSubgroups: false, + })); + expect(api.Groups.allProjects).toHaveBeenCalledWith('root/child/grandchild', expect.objectContaining({ + includeSubgroups: false, + })); + expect(api.Groups.allProjects).toHaveBeenCalledWith('root/other-child', expect.objectContaining({ + includeSubgroups: false, + })); + expect(api.Groups.allProjects).not.toHaveBeenCalledWith(expect.anything(), expect.objectContaining({ + includeSubgroups: true, + })); +}); + +test('getGitLabProjectsForGroupTree stops when GitLab returns a non-advancing next page.', async () => { + const project = { + id: 1, + path_with_namespace: 'root/project-a', + } as ProjectSchema; + + const api = { + Groups: { + allProjects: vi.fn(async () => ({ + data: [project], + paginationInfo: { + next: 1, + }, + })), + allSubgroups: vi.fn(async () => ({ + data: [], + paginationInfo: { + next: null, + }, + })), + }, + } as unknown as InstanceType; + + const projects = await getGitLabProjectsForGroupTree(api, 'root'); + + expect(projects).toEqual([project]); + expect(api.Groups.allProjects).toHaveBeenCalledTimes(1); +}); diff --git a/packages/backend/src/gitlab.ts b/packages/backend/src/gitlab.ts index 94a6e0710..13de8f2c3 100644 --- a/packages/backend/src/gitlab.ts +++ b/packages/backend/src/gitlab.ts @@ -1,4 +1,4 @@ -import { Gitlab, ProjectSchema } from "@gitbeaker/rest"; +import { Gitlab, GroupSchema, ProjectSchema } from "@gitbeaker/rest"; import * as Sentry from "@sentry/node"; import { getTokenFromConfig } from "@sourcebot/shared"; import { createLogger } from "@sourcebot/shared"; @@ -10,6 +10,100 @@ import { fetchWithRetry, measure } from "./utils.js"; const logger = createLogger('gitlab'); export const GITLAB_CLOUD_HOSTNAME = "gitlab.com"; +const GITLAB_PAGE_SIZE = 100; + +type GitLabApi = InstanceType; + +type GitLabPaginatedResponse = { + data: T[]; + paginationInfo?: { + next: number | null; + }; +}; + +const fetchAllGitLabPages = async ( + identifier: string, + fetchPage: (page: number) => Promise>, +): Promise => { + const items: T[] = []; + let page = 1; + + while (true) { + const response = await fetchWithRetry( + () => fetchPage(page), + `${identifier} page ${page}`, + logger, + ); + + items.push(...response.data); + + const nextPage = response.paginationInfo?.next; + if (!nextPage) { + break; + } + if (nextPage <= page) { + logger.warn(`Stopping pagination for ${identifier}: GitLab returned non-advancing next page ${nextPage} after page ${page}.`); + break; + } + + page = nextPage; + } + + return items; +}; + +export const getGitLabProjectsForGroupTree = async ( + api: GitLabApi, + rootGroup: string, +): Promise => { + const projectsById = new Map(); + const groupsToVisit: Array = [rootGroup]; + let groupIndex = 0; + const visitedGroups = new Set(); + + while (groupIndex < groupsToVisit.length) { + const group = groupsToVisit[groupIndex++]!; + const groupKey = String(group); + if (visitedGroups.has(groupKey)) { + continue; + } + visitedGroups.add(groupKey); + + const [projects, subgroups] = await Promise.all([ + fetchAllGitLabPages( + `projects for GitLab group ${groupKey}`, + async (page) => api.Groups.allProjects(group, { + perPage: GITLAB_PAGE_SIZE, + page, + pagination: 'offset', + showExpanded: true, + includeSubgroups: false, + }) as Promise>, + ), + fetchAllGitLabPages( + `subgroups for GitLab group ${groupKey}`, + async (page) => api.Groups.allSubgroups(group, { + perPage: GITLAB_PAGE_SIZE, + page, + pagination: 'offset', + showExpanded: true, + }) as Promise>, + ), + ]); + + for (const project of projects) { + projectsById.set(project.id, project); + } + + for (const subgroup of subgroups) { + groupsToVisit.push(subgroup.full_path ?? subgroup.id); + } + } + + logger.debug(`Fetched ${projectsById.size} projects across ${visitedGroups.size} GitLab group(s) under ${rootGroup}.`); + + return [...projectsById.values()]; +}; export const createGitLabFromPersonalAccessToken = async ({ token, url }: { token?: string, url?: string }) => { const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : true; @@ -81,11 +175,7 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) = try { logger.debug(`Fetching project info for group ${group}...`); const { durationMs, data } = await measure(async () => { - const fetchFn = () => api.Groups.allProjects(group, { - perPage: 100, - includeSubgroups: true - }); - return fetchWithRetry(fetchFn, `group ${group}`, logger); + return getGitLabProjectsForGroupTree(api, group); }); logger.debug(`Found ${data.length} projects in group ${group} in ${durationMs}ms.`); return { @@ -333,4 +423,4 @@ export const getOAuthScopesForAuthenticatedUser = async (api: InstanceType