Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile.cbm
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ GRAPH_BUFFER_SRCS = src/graph_buffer/graph_buffer.c
# Pipeline module (new)
PIPELINE_SRCS = \
src/pipeline/fqn.c \
src/pipeline/project_resolve.c \
src/pipeline/path_alias.c \
src/pipeline/registry.c \
src/pipeline/pipeline.c \
Expand Down Expand Up @@ -332,7 +333,7 @@ TEST_DISCOVER_SRCS = \

TEST_GRAPH_BUFFER_SRCS = tests/test_graph_buffer.c

TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_route_canon.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c
TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_route_canon.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c tests/test_project_resolve.c

TEST_WATCHER_SRCS = tests/test_watcher.c

Expand Down
4 changes: 4 additions & 0 deletions src/cli/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -2625,6 +2625,8 @@ int cbm_cmd_config(int argc, char **argv) {
"Enable auto-indexing on MCP session start");
printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_INDEX_LIMIT, "50000",
"Max files for auto-indexing new projects");
printf(" %-25s default=%-10s %s\n", CBM_CONFIG_AUTO_WATCH, "false",
"Enable git watcher background re-indexing (off by default)");
return 0;
}

Expand All @@ -2650,6 +2652,8 @@ int cbm_cmd_config(int argc, char **argv) {
cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX, "false"));
printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_INDEX_LIMIT,
cbm_config_get(cfg, CBM_CONFIG_AUTO_INDEX_LIMIT, "50000"));
printf(" %-25s = %-10s\n", CBM_CONFIG_AUTO_WATCH,
cbm_config_get(cfg, CBM_CONFIG_AUTO_WATCH, "false"));
} else if (strcmp(argv[0], "get") == 0) {
if (argc < MIN_ARGC_GET) {
(void)fprintf(stderr, "Usage: config get <key>\n");
Expand Down
1 change: 1 addition & 0 deletions src/cli/cli.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ int cbm_config_delete(cbm_config_t *cfg, const char *key);
/* Well-known config keys */
#define CBM_CONFIG_AUTO_INDEX "auto_index"
#define CBM_CONFIG_AUTO_INDEX_LIMIT "auto_index_limit"
#define CBM_CONFIG_AUTO_WATCH "auto_watch"

/* ── Subcommands (wired from main.c) ─────────────────────────── */

Expand Down
52 changes: 40 additions & 12 deletions src/mcp/mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ enum {
#include <sqlite3.h>
#include "cypher/cypher.h"
#include "pipeline/pipeline.h"
#include "pipeline/project_resolve.h"
#include "pipeline/pass_cross_repo.h"
#include "git/git_context.h"
#include "cli/cli.h"
Expand Down Expand Up @@ -2781,6 +2782,9 @@ static bool build_index_success_response(cbm_mcp_server_t *srv, yyjson_mut_doc *
return degraded;
}

static bool auto_watch_enabled(cbm_mcp_server_t *srv);
static void register_watcher_if_enabled(cbm_mcp_server_t *srv);

static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) {
char *repo_path = cbm_mcp_get_string_arg(args, "repo_path");
char *mode_str = cbm_mcp_get_string_arg(args, "mode");
Expand Down Expand Up @@ -2864,6 +2868,9 @@ static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) {
bool degraded = build_index_success_response(srv, doc, root, project_name, repo_path,
persistence, p, excluded_dirs, excluded_count);
yyjson_mut_obj_add_str(doc, root, "status", degraded ? "degraded" : "indexed");
if (srv->watcher && auto_watch_enabled(srv)) {
cbm_watcher_watch(srv->watcher, project_name, repo_path);
}
} else {
yyjson_mut_obj_add_str(doc, root, "status", "error");
yyjson_mut_obj_add_str(doc, root, "hint",
Expand Down Expand Up @@ -4568,15 +4575,41 @@ static void detect_session(cbm_mcp_server_t *srv) {
* used by the pipeline, otherwise session queries look for a .db file
* that doesn't match the indexed project name. */
if (srv->session_root[0]) {
char *pname = cbm_project_name_from_path(srv->session_root);
if (pname) {
snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname);
free(pname);
char *existing = cbm_find_existing_project_name(srv->session_root);
if (existing) {
snprintf(srv->session_project, sizeof(srv->session_project), "%s", existing);
cbm_log_info("session.project.reuse", "project", existing, "path", srv->session_root);
free(existing);
} else {
char *pname = cbm_project_name_from_path(srv->session_root);
if (pname) {
snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname);
free(pname);
}
}
}
}

/* Background auto-index thread function */
static bool auto_watch_enabled(cbm_mcp_server_t *srv) {
if (!srv || !srv->config) {
return false;
}
return cbm_config_get_bool(srv->config, CBM_CONFIG_AUTO_WATCH, false);
}

static void register_watcher_if_enabled(cbm_mcp_server_t *srv) {
if (!srv || !srv->watcher || srv->session_project[0] == '\0' || srv->session_root[0] == '\0') {
return;
}
if (!auto_watch_enabled(srv)) {
cbm_log_info("watcher.skip", "reason", "auto_watch_disabled", "hint",
"run: codebase-memory-mcp config set auto_watch true");
return;
}
cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root);
}

static void *autoindex_thread(void *arg) {
cbm_mcp_server_t *srv = (cbm_mcp_server_t *)arg;

Expand All @@ -4598,10 +4631,7 @@ static void *autoindex_thread(void *arg) {

if (rc == 0) {
cbm_log_info("autoindex.done", "project", srv->session_project);
/* Register with watcher for ongoing change detection */
if (srv->watcher) {
cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root);
}
register_watcher_if_enabled(srv);
} else {
cbm_log_warn("autoindex.err", "msg", "pipeline_run_failed");
}
Expand All @@ -4621,12 +4651,10 @@ static void maybe_auto_index(cbm_mcp_server_t *srv) {
snprintf(db_check, sizeof(db_check), "%s/%s.db", cbm_resolve_cache_dir(),
srv->session_project);
if (cbm_file_size(db_check) >= 0) {
/* Already indexed → register watcher for change detection */
/* Already indexed — use existing graph; never auto re-index on connect. */
cbm_log_info("autoindex.skip", "reason", "already_indexed", "project",
srv->session_project);
if (srv->watcher) {
cbm_watcher_watch(srv->watcher, srv->session_project, srv->session_root);
}
register_watcher_if_enabled(srv);
return;
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/pipeline/pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, PL_WAL_BUF = 1040 };
#define PL_NSEC_PER_SEC 1000000000LL
#include "pipeline/pipeline.h"
#include "pipeline/project_resolve.h"
#include "pipeline/artifact.h"
#include "pipeline/pipeline_internal.h"
#include "pipeline/pass_lsp_cross.h"
Expand Down Expand Up @@ -153,7 +154,8 @@ cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path,

p->repo_path = strdup(repo_path);
p->db_path = db_path ? strdup(db_path) : NULL;
p->project_name = cbm_project_name_from_path(repo_path);
char *existing = cbm_find_existing_project_name(repo_path);
p->project_name = existing ? existing : cbm_project_name_from_path(repo_path);
(void)cbm_git_context_resolve(repo_path, &p->git_ctx);
p->branch_qn = cbm_git_context_branch_qn(p->project_name, &p->git_ctx);
p->mode = mode;
Expand Down
148 changes: 148 additions & 0 deletions src/pipeline/project_resolve.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* project_resolve.c — Canonical path identity and duplicate-index prevention.
*/
#include "pipeline/project_resolve.h"
#include "pipeline/pipeline.h"
#include "foundation/platform.h"
#include "foundation/compat_fs.h"
#include "git/git_context.h"
#include "store/store.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz) {
if (!path || !out || out_sz == 0) {
return false;
}
out[0] = '\0';
#ifdef _WIN32
if (!_fullpath(out, path, out_sz)) {
return false;
}
cbm_normalize_path_sep(out);
#else
if (!realpath(path, out)) {
return false;
}
#endif
return out[0] != '\0';
}

bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz) {
if (!repo_path || !out || out_sz == 0) {
return false;
}

cbm_git_context_t ctx = {0};
if (cbm_git_context_resolve(repo_path, &ctx) == 0 && ctx.canonical_root &&
ctx.canonical_root[0]) {
snprintf(out, out_sz, "%s", ctx.canonical_root);
cbm_normalize_path_sep(out);
cbm_git_context_free(&ctx);
return true;
}
cbm_git_context_free(&ctx);
return cbm_path_canonicalize(repo_path, out, out_sz);
}

static bool identity_nested(const char *child, const char *parent) {
if (!child[0] || !parent[0]) {
return false;
}
if (strcmp(child, parent) == 0) {
return true;
}
size_t plen = strlen(parent);
if (strncmp(child, parent, plen) != 0) {
return false;
}
return child[plen] == '/';
}

static bool is_project_db_file(const char *name, size_t len) {
if (len < 5 || strcmp(name + len - 3, ".db") != 0) {
return false;
}
if (name[0] == '_') {
return false;
}
return true;
}

char *cbm_find_existing_project_name(const char *repo_path) {
if (!repo_path || !repo_path[0]) {
return NULL;
}

char query_key[4096];
if (!cbm_project_identity_key(repo_path, query_key, sizeof(query_key))) {
return NULL;
}

char cache_dir[1024];
snprintf(cache_dir, sizeof(cache_dir), "%s", cbm_resolve_cache_dir());

cbm_dir_t *d = cbm_opendir(cache_dir);
if (!d) {
return NULL;
}

char *best_name = NULL;
size_t best_root_len = 0;

cbm_dirent_t *entry;
while ((entry = cbm_readdir(d)) != NULL) {
const char *name = entry->name;
size_t len = strlen(name);
if (!is_project_db_file(name, len)) {
continue;
}

char db_path[2048];
snprintf(db_path, sizeof(db_path), "%s/%s", cache_dir, name);

cbm_store_t *store = cbm_store_open_path(db_path);
if (!store) {
continue;
}

char project_name[1024];
snprintf(project_name, sizeof(project_name), "%.*s", (int)(len - 3), name);

cbm_project_t proj = {0};
if (cbm_store_get_project(store, project_name, &proj) != CBM_STORE_OK || !proj.root_path) {
safe_str_free(&proj.name);
safe_str_free(&proj.indexed_at);
safe_str_free(&proj.root_path);
cbm_store_close(store);
continue;
}

char indexed_key[4096];
bool has_key = cbm_project_identity_key(proj.root_path, indexed_key, sizeof(indexed_key));

safe_str_free(&proj.name);
safe_str_free(&proj.indexed_at);
safe_str_free(&proj.root_path);
cbm_store_close(store);

if (!has_key) {
continue;
}

if (strcmp(query_key, indexed_key) == 0 || identity_nested(query_key, indexed_key) ||
identity_nested(indexed_key, query_key)) {
size_t root_len = strlen(indexed_key);
if (!best_name || root_len > best_root_len) {
free(best_name);
best_name = strdup(project_name);
best_root_len = root_len;
}
}
}

cbm_closedir(d);
return best_name;
}
17 changes: 17 additions & 0 deletions src/pipeline/project_resolve.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef CBM_PROJECT_RESOLVE_H
#define CBM_PROJECT_RESOLVE_H

#include <stdbool.h>
#include <stddef.h>

/* Canonicalize path (realpath / _fullpath). Returns false if path is invalid. */
bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz);

/* Stable identity for dedup: git canonical_root when available, else canonical path. */
bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz);

/* Return heap-allocated existing project name when repo_path matches a cached index
* (same identity or nested under an indexed root). Caller frees; NULL if no match. */
char *cbm_find_existing_project_name(const char *repo_path);

#endif
3 changes: 3 additions & 0 deletions tests/test_cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -2493,6 +2493,9 @@ TEST(cli_config_get_bool) {
ASSERT_FALSE(cbm_config_get_bool(cfg, "auto_index", false));
ASSERT_TRUE(cbm_config_get_bool(cfg, "auto_index", true));

ASSERT_FALSE(cbm_config_get_bool(cfg, CBM_CONFIG_AUTO_WATCH, false));
ASSERT_TRUE(cbm_config_get_bool(cfg, CBM_CONFIG_AUTO_WATCH, true));

/* true variants */
cbm_config_set(cfg, "k1", "true");
ASSERT_TRUE(cbm_config_get_bool(cfg, "k1", false));
Expand Down
Loading