diff --git a/Makefile.cbm b/Makefile.cbm index 2bcf7b4d7..261ce1766 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -175,6 +175,7 @@ GRAPH_BUFFER_SRCS = src/graph_buffer/graph_buffer.c # Pipeline module (new) PIPELINE_SRCS = \ src/pipeline/fqn.c \ + src/pipeline/project_resolve.c \ src/pipeline/path_alias.c \ src/pipeline/registry.c \ src/pipeline/pipeline.c \ @@ -332,7 +333,7 @@ TEST_DISCOVER_SRCS = \ TEST_GRAPH_BUFFER_SRCS = tests/test_graph_buffer.c -TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_route_canon.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c +TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_route_canon.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c tests/test_project_resolve.c TEST_WATCHER_SRCS = tests/test_watcher.c diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 7016a0d21..3ac34c4ef 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -42,6 +42,7 @@ enum { #include #include "cypher/cypher.h" #include "pipeline/pipeline.h" +#include "pipeline/project_resolve.h" #include "pipeline/pass_cross_repo.h" #include "git/git_context.h" #include "cli/cli.h" @@ -4568,10 +4569,17 @@ static void detect_session(cbm_mcp_server_t *srv) { * used by the pipeline, otherwise session queries look for a .db file * that doesn't match the indexed project name. */ if (srv->session_root[0]) { - char *pname = cbm_project_name_from_path(srv->session_root); - if (pname) { - snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname); - free(pname); + char *existing = cbm_find_existing_project_name(srv->session_root); + if (existing) { + snprintf(srv->session_project, sizeof(srv->session_project), "%s", existing); + cbm_log_info("session.project.reuse", "project", existing, "path", srv->session_root); + free(existing); + } else { + char *pname = cbm_project_name_from_path(srv->session_root); + if (pname) { + snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname); + free(pname); + } } } } diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 8e370f7c3..554a0e3e1 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -15,6 +15,7 @@ enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, PL_WAL_BUF = 1040 }; #define PL_NSEC_PER_SEC 1000000000LL #include "pipeline/pipeline.h" +#include "pipeline/project_resolve.h" #include "pipeline/artifact.h" #include "pipeline/pipeline_internal.h" #include "pipeline/pass_lsp_cross.h" @@ -153,7 +154,8 @@ cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path, p->repo_path = strdup(repo_path); p->db_path = db_path ? strdup(db_path) : NULL; - p->project_name = cbm_project_name_from_path(repo_path); + char *existing = cbm_find_existing_project_name(repo_path); + p->project_name = existing ? existing : cbm_project_name_from_path(repo_path); (void)cbm_git_context_resolve(repo_path, &p->git_ctx); p->branch_qn = cbm_git_context_branch_qn(p->project_name, &p->git_ctx); p->mode = mode; diff --git a/src/pipeline/project_resolve.c b/src/pipeline/project_resolve.c new file mode 100644 index 000000000..2c9ac22fb --- /dev/null +++ b/src/pipeline/project_resolve.c @@ -0,0 +1,148 @@ +/* + * project_resolve.c — Canonical path identity and duplicate-index prevention. + */ +#include "pipeline/project_resolve.h" +#include "pipeline/pipeline.h" +#include "foundation/platform.h" +#include "foundation/compat_fs.h" +#include "git/git_context.h" +#include "store/store.h" + +#include +#include +#include + +bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz) { + if (!path || !out || out_sz == 0) { + return false; + } + out[0] = '\0'; +#ifdef _WIN32 + if (!_fullpath(out, path, out_sz)) { + return false; + } + cbm_normalize_path_sep(out); +#else + if (!realpath(path, out)) { + return false; + } +#endif + return out[0] != '\0'; +} + +bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz) { + if (!repo_path || !out || out_sz == 0) { + return false; + } + + cbm_git_context_t ctx = {0}; + if (cbm_git_context_resolve(repo_path, &ctx) == 0 && ctx.canonical_root && + ctx.canonical_root[0]) { + snprintf(out, out_sz, "%s", ctx.canonical_root); + cbm_normalize_path_sep(out); + cbm_git_context_free(&ctx); + return true; + } + cbm_git_context_free(&ctx); + return cbm_path_canonicalize(repo_path, out, out_sz); +} + +static bool identity_nested(const char *child, const char *parent) { + if (!child[0] || !parent[0]) { + return false; + } + if (strcmp(child, parent) == 0) { + return true; + } + size_t plen = strlen(parent); + if (strncmp(child, parent, plen) != 0) { + return false; + } + return child[plen] == '/'; +} + +static bool is_project_db_file(const char *name, size_t len) { + if (len < 5 || strcmp(name + len - 3, ".db") != 0) { + return false; + } + if (name[0] == '_') { + return false; + } + return true; +} + +char *cbm_find_existing_project_name(const char *repo_path) { + if (!repo_path || !repo_path[0]) { + return NULL; + } + + char query_key[4096]; + if (!cbm_project_identity_key(repo_path, query_key, sizeof(query_key))) { + return NULL; + } + + char cache_dir[1024]; + snprintf(cache_dir, sizeof(cache_dir), "%s", cbm_resolve_cache_dir()); + + cbm_dir_t *d = cbm_opendir(cache_dir); + if (!d) { + return NULL; + } + + char *best_name = NULL; + size_t best_root_len = 0; + + cbm_dirent_t *entry; + while ((entry = cbm_readdir(d)) != NULL) { + const char *name = entry->name; + size_t len = strlen(name); + if (!is_project_db_file(name, len)) { + continue; + } + + char db_path[2048]; + snprintf(db_path, sizeof(db_path), "%s/%s", cache_dir, name); + + cbm_store_t *store = cbm_store_open_path(db_path); + if (!store) { + continue; + } + + char project_name[1024]; + snprintf(project_name, sizeof(project_name), "%.*s", (int)(len - 3), name); + + cbm_project_t proj = {0}; + if (cbm_store_get_project(store, project_name, &proj) != CBM_STORE_OK || !proj.root_path) { + safe_str_free(&proj.name); + safe_str_free(&proj.indexed_at); + safe_str_free(&proj.root_path); + cbm_store_close(store); + continue; + } + + char indexed_key[4096]; + bool has_key = cbm_project_identity_key(proj.root_path, indexed_key, sizeof(indexed_key)); + + safe_str_free(&proj.name); + safe_str_free(&proj.indexed_at); + safe_str_free(&proj.root_path); + cbm_store_close(store); + + if (!has_key) { + continue; + } + + if (strcmp(query_key, indexed_key) == 0 || identity_nested(query_key, indexed_key) || + identity_nested(indexed_key, query_key)) { + size_t root_len = strlen(indexed_key); + if (!best_name || root_len > best_root_len) { + free(best_name); + best_name = strdup(project_name); + best_root_len = root_len; + } + } + } + + cbm_closedir(d); + return best_name; +} diff --git a/src/pipeline/project_resolve.h b/src/pipeline/project_resolve.h new file mode 100644 index 000000000..d36b724fc --- /dev/null +++ b/src/pipeline/project_resolve.h @@ -0,0 +1,17 @@ +#ifndef CBM_PROJECT_RESOLVE_H +#define CBM_PROJECT_RESOLVE_H + +#include +#include + +/* Canonicalize path (realpath / _fullpath). Returns false if path is invalid. */ +bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz); + +/* Stable identity for dedup: git canonical_root when available, else canonical path. */ +bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz); + +/* Return heap-allocated existing project name when repo_path matches a cached index + * (same identity or nested under an indexed root). Caller frees; NULL if no match. */ +char *cbm_find_existing_project_name(const char *repo_path); + +#endif diff --git a/tests/test_project_resolve.c b/tests/test_project_resolve.c new file mode 100644 index 000000000..874f728d9 --- /dev/null +++ b/tests/test_project_resolve.c @@ -0,0 +1,155 @@ +/* + * test_project_resolve.c — Canonical project identity and duplicate-index prevention. + */ +#include "../src/foundation/compat.h" +#include "test_framework.h" +#include "test_helpers.h" +#include "pipeline/project_resolve.h" +#include "pipeline/pipeline.h" +#include + +#include +#include +#include +#include + +typedef struct { + const char *cache; + const char *project; + const char *root; +} seed_ctx_t; + +typedef struct { + const char *query_root; + char **found; +} find_ctx_t; + +typedef struct { + const char *root; + cbm_pipeline_t **pipeline; +} pipeline_ctx_t; + +static void with_cache_dir(const char *cache, void (*fn)(void *), void *ctx) { + const char *saved = getenv("CBM_CACHE_DIR"); + char *saved_copy = saved ? strdup(saved) : NULL; + cbm_setenv("CBM_CACHE_DIR", cache, 1); + fn(ctx); + if (saved_copy) { + cbm_setenv("CBM_CACHE_DIR", saved_copy, 1); + free(saved_copy); + } else { + cbm_unsetenv("CBM_CACHE_DIR"); + } +} + +static void seed_project_db(void *vctx) { + seed_ctx_t *ctx = (seed_ctx_t *)vctx; + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/%s.db", ctx->cache, ctx->project); + cbm_store_t *store = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(store); + ASSERT_EQ(cbm_store_upsert_project(store, ctx->project, ctx->root), CBM_STORE_OK); + cbm_store_close(store); +} + +static void find_existing_project(void *vctx) { + find_ctx_t *ctx = (find_ctx_t *)vctx; + *(ctx->found) = cbm_find_existing_project_name(ctx->query_root); +} + +static void open_pipeline_for_root(void *vctx) { + pipeline_ctx_t *ctx = (pipeline_ctx_t *)vctx; + *(ctx->pipeline) = cbm_pipeline_new(ctx->root, NULL, CBM_MODE_FAST); +} + +TEST(project_resolve_path_canonicalize) { + char tmpdir[256]; + snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm-projres-XXXXXX"); + if (!cbm_mkdtemp(tmpdir)) + FAIL("cbm_mkdtemp failed"); + + char file[512]; + snprintf(file, sizeof(file), "%s/readme.txt", tmpdir); + th_write_file(file, "x"); + + char canon[1024]; + ASSERT_TRUE(cbm_path_canonicalize(file, canon, sizeof(canon))); + ASSERT(strstr(canon, "readme.txt") != NULL); + + test_rmdir_r(tmpdir); + PASS(); +} + +TEST(project_resolve_identity_key_stable) { + char key1[1024]; + char key2[1024]; + ASSERT_TRUE(cbm_project_identity_key("/tmp/foo/bar", key1, sizeof(key1))); + ASSERT_TRUE(cbm_project_identity_key("/tmp/foo/bar/", key2, sizeof(key2))); + ASSERT_STR_EQ(key1, key2); + PASS(); +} + +TEST(project_resolve_find_existing_by_root_path) { + char cache[256]; + snprintf(cache, sizeof(cache), "/tmp/cbm-projres-cache-XXXXXX"); + if (!cbm_mkdtemp(cache)) + FAIL("cbm_mkdtemp failed"); + + char root[512]; + snprintf(root, sizeof(root), "%s/repo-root", cache); + test_mkdirp(root); + + seed_ctx_t seed = {.cache = cache, .project = "indexed-project", .root = root}; + with_cache_dir(cache, seed_project_db, &seed); + + char *found = NULL; + find_ctx_t fctx = {.query_root = root, .found = &found}; + with_cache_dir(cache, find_existing_project, &fctx); + + ASSERT_NOT_NULL(found); + ASSERT_STR_EQ(found, "indexed-project"); + free(found); + + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/indexed-project.db", cache); + cbm_unlink(db_path); + test_rmdir_r(root); + cbm_rmdir(cache); + PASS(); +} + +TEST(project_resolve_pipeline_reuses_existing_name) { + char cache[256]; + snprintf(cache, sizeof(cache), "/tmp/cbm-projres-pl-XXXXXX"); + if (!cbm_mkdtemp(cache)) + FAIL("cbm_mkdtemp failed"); + + char root[512]; + snprintf(root, sizeof(root), "%s/worktree", cache); + test_mkdirp(root); + + seed_ctx_t seed = {.cache = cache, .project = "canonical-name", .root = root}; + with_cache_dir(cache, seed_project_db, &seed); + + cbm_pipeline_t *p = NULL; + pipeline_ctx_t pctx = {.root = root, .pipeline = &p}; + with_cache_dir(cache, open_pipeline_for_root, &pctx); + + ASSERT_NOT_NULL(p); + ASSERT_STR_EQ(cbm_pipeline_project_name(p), "canonical-name"); + cbm_pipeline_free(p); + + char db_path[1024]; + snprintf(db_path, sizeof(db_path), "%s/canonical-name.db", cache); + cbm_unlink(db_path); + test_rmdir_r(root); + cbm_rmdir(cache); + PASS(); +} + +SUITE(project_resolve) { + RUN_TEST(project_resolve_path_canonicalize); + RUN_TEST(project_resolve_identity_key_stable); + RUN_TEST(project_resolve_find_existing_by_root_path); + RUN_TEST(project_resolve_pipeline_reuses_existing_name); +}