Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile.cbm
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ PIPELINE_SRCS = \
src/pipeline/pass_similarity.c \
src/pipeline/pass_semantic_edges.c \
src/pipeline/pass_complexity.c \
src/pipeline/pass_importance.c \
src/pipeline/pass_cross_repo.c \
src/pipeline/artifact.c \
src/pipeline/pass_pkgmap.c
Expand Down Expand Up @@ -326,7 +327,7 @@ TEST_DISCOVER_SRCS = \

TEST_GRAPH_BUFFER_SRCS = tests/test_graph_buffer.c

TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c
TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c tests/test_importance.c

TEST_WATCHER_SRCS = tests/test_watcher.c

Expand Down
15 changes: 15 additions & 0 deletions src/foundation/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,21 @@ enum {
CBM_PERCENT = 100,
};

/* ── Per-file parse-size cap ──────────────────────────────────── */
/* Default per-file source-read cap (MB), env-overridable via CBM_MAX_FILE_MB
* (see cbm_max_file_bytes() in system_info.c, same clamp shape as
* cbm_default_worker_count()/CBM_WORKERS). 10 MB clears a hand-authored
* amalgamation source like sqlite3.c (~8 MB) while still bounding a single
* worker's per-file parse working set. Previously expressed 7x as the
* misused CBM_PERCENT (100) * CBM_SZ_1K * CBM_SZ_1K "100 MB" literal —
* collapsed to this single named constant; CBM_PERCENT itself is untouched
* and remains the real percentage constant for mem.c/vmem.c/cypher.c. */
enum {
CBM_DEFAULT_MAX_FILE_MB = 10,
CBM_MIN_FILE_MB = 1, /* floor for CBM_MAX_FILE_MB env override */
CBM_MAX_FILE_MB_CAP = 1024, /* ceiling for CBM_MAX_FILE_MB env override (1 GB) */
};

/* ── Tree-sitter field name helper ───────────────────────────── */
/* Usage: ts_node_child_by_field_name(node, TS_FIELD("callee"))
* Expands to: ts_node_child_by_field_name(node, TS_FIELD("callee"))
Expand Down
93 changes: 92 additions & 1 deletion src/foundation/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <mimalloc.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>

#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
Expand All @@ -38,6 +39,17 @@ static atomic_int g_was_over; /* pressure hysteresis */

#define MB_DIVISOR ((size_t)(CBM_SZ_1K * CBM_SZ_1K))

/* ── Hard memory ceiling (enforcing — see mem.h) ─────────────────
*
* Fraction of cgroup-aware detected RAM (cbm_system_info().total_ram),
* always strictly above the advisory DEFAULT_RAM_FRACTION budget so a
* repo that trips backpressure but recovers never reaches the ceiling.
* Absolute floor protects a legit big repo on a small-RAM CI runner from
* spuriously aborting at a tiny fraction-derived value. */
#define CBM_MEM_CEILING_FRACTION 0.85
#define CBM_MEM_CEILING_FLOOR_MB ((size_t)2048) /* 2 GB floor */
#define CBM_MEM_CEILING_CAP_MB ((size_t)1024 * 1024) /* 1 TB env-override ceiling */

/* ── OS fallback for RSS (ASan builds where MI_OVERRIDE=0) ──── */

static size_t os_rss(void) {
Expand Down Expand Up @@ -134,13 +146,41 @@ void cbm_mem_init(double ram_fraction) {
}

size_t cbm_mem_rss(void) {
#if defined(__linux__) && !defined(_WIN32)
/* Linux: mimalloc's _mi_prim_process_info() (vendored/mimalloc/src/
* prim/unix/prim.c) never sets pinfo->current_rss on Linux — only
* peak_rss (via getrusage's ru_maxrss, a high-water mark). current_rss
* silently falls back to mimalloc's OWN committed-page counter
* (mi_process_info()'s pinfo.current_commit default), which this
* project deliberately tunes low via mi_option_arena_eager_commit=0 +
* mi_option_purge_decommits=1 + mi_option_purge_delay=0 (cbm_mem_init,
* above) to reduce upfront memory. Combined, "current_rss" on Linux
* can read near-zero (observed: 4MB) while true RSS is multiple GB —
* a small-but-NONZERO value that defeated the `current_rss > 0`
* ASan-only fallback guard below, silently blinding both this
* function's callers (cbm_mem_over_budget backpressure AND the
* enforcing ceiling in this same file) to real memory pressure during
* concurrent large-file parsing — the exact mechanism the 2026-07-01
* incident diagnosed. os_rss() (/proc/self/statm) is unaffected by
* mimalloc's internal accounting and is authoritative OS-reported RSS
* on every Linux build regardless of allocator tuning, so it is the
* PRIMARY source here, not a last-resort fallback. Verified via a
* calibrated 40x8MB synthetic large-file index on IO: mi_process_info
* current_rss=4MB, actual RSS (ps/proc)=~2.2GB, os_rss()=~2.2GB. */
size_t rss = os_rss();
if (rss > 0) {
return rss;
}
/* Extremely unlikely (/proc unavailable) — fall through to mimalloc. */
#endif
size_t current_rss = 0;
size_t peak_rss = 0;
mi_process_info(NULL, NULL, NULL, &current_rss, &peak_rss, NULL, NULL, NULL);
if (current_rss > 0) {
return current_rss;
}
/* Fallback for ASan builds (MI_OVERRIDE=0) */
/* Fallback for ASan builds (MI_OVERRIDE=0) and any other platform
* where mimalloc's current_rss is unavailable/zero. */
return os_rss();
}

Expand Down Expand Up @@ -174,3 +214,54 @@ size_t cbm_mem_worker_budget(int num_workers) {
void cbm_mem_collect(void) {
mi_collect(true);
}

/* ── Hard memory ceiling (enforcing) ─────────────────────────────── */

size_t cbm_mem_ceiling(void) {
/* CBM_MEM_CEILING_MB env override (clamped to [CBM_MEM_CEILING_FLOOR_MB,
* CBM_MEM_CEILING_CAP_MB]). Same precedence/clamp shape as
* CBM_WORKERS / CBM_MAX_FILE_MB: unset, blank, or non-numeric all parse
* to 0 via strtoull, which falls below the floor and is rejected the
* same way an out-of-range value is. */
char buf[CBM_SZ_32];
if (cbm_safe_getenv("CBM_MEM_CEILING_MB", buf, sizeof(buf), NULL) != NULL) {
char *end = NULL;
unsigned long long mb = strtoull(buf, &end, CBM_DECIMAL_BASE);
if (end != buf && mb >= CBM_MEM_CEILING_FLOOR_MB && mb <= CBM_MEM_CEILING_CAP_MB) {
return (size_t)mb * MB_DIVISOR;
}
cbm_log_warn("mem_ceiling.env.invalid", "value", buf, "fallback", "fraction");
}

cbm_system_info_t info = cbm_system_info();
size_t fraction_bytes = (size_t)((double)info.total_ram * CBM_MEM_CEILING_FRACTION);
size_t floor_bytes = CBM_MEM_CEILING_FLOOR_MB * MB_DIVISOR;
return fraction_bytes > floor_bytes ? fraction_bytes : floor_bytes;
}

bool cbm_mem_over_ceiling(void) {
return cbm_mem_rss() > cbm_mem_ceiling();
}

void cbm_mem_abort_if_over_ceiling(const char *file, const char *phase) {
size_t rss = cbm_mem_rss();
size_t ceiling = cbm_mem_ceiling();
if (rss <= ceiling) {
return;
}

char rss_mb[CBM_SZ_32];
char ceiling_mb[CBM_SZ_32];
snprintf(rss_mb, sizeof(rss_mb), "%zu", rss / MB_DIVISOR);
snprintf(ceiling_mb, sizeof(ceiling_mb), "%zu", ceiling / MB_DIVISOR);
cbm_log_error("mem.ceiling.abort", "file", file ? file : "unknown", "phase",
phase ? phase : "n/a", "rss_mb", rss_mb, "ceiling_mb", ceiling_mb);
/* Hard abort: SIGABRT, default handler, non-zero exit. Intentionally not
* a graceful cancel — cbm_pipeline_cancel() already exists for that path
* and is advisory-cooperative (workers check an atomic and unwind); RSS
* already over the enforcing ceiling means further allocation to unwind
* cleanly (free lists, log buffers) is itself the risk being guarded
* against, so we terminate immediately instead. Must only be reached
* from the in-memory extract/resolve phases, before any SQLite dump. */
abort();
}
34 changes: 34 additions & 0 deletions src/foundation/mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,38 @@ size_t cbm_mem_worker_budget(int num_workers);
/* Return unused pages to the OS. Call between files to bound per-file peak. */
void cbm_mem_collect(void);

/* ── Hard memory ceiling (abort, not advisory) ───────────────────
*
* Distinct from cbm_mem_budget()/cbm_mem_over_budget() above, which are
* ADVISORY: pass_parallel.c backpressure naps and proceeds with a soft
* overshoot when workers can't get back under budget. The ceiling below is
* ENFORCING: cbm_mem_abort_if_over_ceiling() hard-aborts the process
* (abort(), SIGABRT) when exceeded, after emitting a diagnostic dump naming
* the offending file, pipeline phase, and RSS. It is always set strictly
* above the advisory budget (see cbm_mem_ceiling), so a repo that trips the
* advisory backpressure but recovers never reaches the ceiling.
*
* Call cbm_mem_abort_if_over_ceiling() only from the in-memory
* extract/resolve phases, BEFORE the graph buffer is dumped to SQLite
* (pipeline.c: run_parallel_pipeline() precedes dump_and_persist_hashes()).
* An abort there can never leave a half-written store. */

/* Hard-abort ceiling in bytes: max(CBM_MEM_CEILING_FRACTION * total_ram,
* CBM_MEM_CEILING_FLOOR_MB), unless overridden by the CBM_MEM_CEILING_MB
* env var (same precedence/clamp shape as CBM_WORKERS / CBM_MAX_FILE_MB).
* Always computed fresh (env can change between calls in tests); cheap
* (one getenv + the cached cbm_system_info()). */
size_t cbm_mem_ceiling(void);

/* Returns true if current RSS exceeds cbm_mem_ceiling(). */
bool cbm_mem_over_ceiling(void);

/* If current RSS exceeds cbm_mem_ceiling(), log a diagnostic dump (offending
* file, phase, RSS, ceiling) at ERROR level to stderr and hard-abort the
* process via abort(). Returns (does nothing) otherwise. `file` and `phase`
* may be NULL (logged as "unknown"/"n/a") when the caller has no better
* label at the call site. Not signal-safe; call only from a normal worker
* context, never from a signal handler. */
void cbm_mem_abort_if_over_ceiling(const char *file, const char *phase);

#endif /* CBM_MEM_H */
11 changes: 11 additions & 0 deletions src/foundation/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ cbm_system_info_t cbm_system_info(void);
* initial=false: max(1, perf_cores-1) (leave headroom for user apps) */
int cbm_default_worker_count(bool initial);

/* Per-file source-read cap in bytes, for the read_file() size guard used by
* every extraction pass (pass_calls.c, pass_definitions.c, pass_semantic.c,
* pass_usages.c, pass_k8s.c, pass_parallel.c, pass_lsp_cross.c). A file
* larger than this is SKIPPED (read_file returns NULL), never aborted —
* distinct from the process-wide RSS ceiling in mem.h, which aborts.
* CBM_MAX_FILE_MB env override (clamped to [CBM_MIN_FILE_MB,
* CBM_MAX_FILE_MB_CAP]); default CBM_DEFAULT_MAX_FILE_MB. Same
* precedence/clamp shape as cbm_default_worker_count()/CBM_WORKERS: blank,
* unset, or non-numeric falls back to the default (never coerces to 0). */
long cbm_max_file_bytes(void);

/* ── Environment variables ──────────────────────────────────────── */

/* Thread-safe getenv: copies the value into a caller-provided buffer.
Expand Down
18 changes: 18 additions & 0 deletions src/foundation/system_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,21 @@ int cbm_default_worker_count(bool initial) {
int workers = info.perf_cores - SKIP_ONE;
return workers > 0 ? workers : MIN_WORKERS;
}

long cbm_max_file_bytes(void) {
/* CBM_MAX_FILE_MB env override (clamped to [CBM_MIN_FILE_MB,
* CBM_MAX_FILE_MB_CAP]). Same precedence/clamp shape as
* cbm_default_worker_count()/CBM_WORKERS above: unset, blank, or
* non-numeric all parse to 0 via strtol, which falls below the floor
* and is rejected the same way an out-of-range value is — so blank
* never silently coerces to "cap every file at 0 bytes". */
char buf[CBM_SZ_32];
if (cbm_safe_getenv("CBM_MAX_FILE_MB", buf, sizeof(buf), NULL) != NULL) {
long n = strtol(buf, NULL, CBM_DECIMAL_BASE);
if (n >= CBM_MIN_FILE_MB && n <= CBM_MAX_FILE_MB_CAP) {
return n * (long)CBM_SZ_1K * (long)CBM_SZ_1K;
}
cbm_log_warn("max_file_mb.env.invalid", "value", buf, "fallback", "default");
}
return (long)CBM_DEFAULT_MAX_FILE_MB * (long)CBM_SZ_1K * (long)CBM_SZ_1K;
}
6 changes: 3 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,9 @@ static void print_help(void) {
printf(" Claude Code, Codex CLI, Gemini CLI, Zed, OpenCode,\n");
printf(" Antigravity, Aider, KiloCode, Kiro\n");
printf("\nTools: index_repository, search_graph, query_graph, trace_path,\n");
printf(" get_code_snippet, get_graph_schema, get_architecture, search_code,\n");
printf(" list_projects, delete_project, index_status, detect_changes,\n");
printf(" manage_adr, ingest_traces\n");
printf(" get_code_snippet, get_graph_schema, get_architecture, repo_map,\n");
printf(" search_code, list_projects, delete_project, index_status,\n");
printf(" detect_changes, manage_adr, ingest_traces\n");
}

/* ── Main ───────────────────────────────────────────────────────── */
Expand Down
Loading