Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile.cbm
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ GRAPH_BUFFER_SRCS = src/graph_buffer/graph_buffer.c
# Pipeline module (new)
PIPELINE_SRCS = \
src/pipeline/fqn.c \
src/pipeline/project_resolve.c \
src/pipeline/path_alias.c \
src/pipeline/registry.c \
src/pipeline/pipeline.c \
Expand Down Expand Up @@ -332,7 +333,7 @@ TEST_DISCOVER_SRCS = \

TEST_GRAPH_BUFFER_SRCS = tests/test_graph_buffer.c

TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_route_canon.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c
TEST_PIPELINE_SRCS = tests/test_registry.c tests/test_pipeline.c tests/test_fqn.c tests/test_route_canon.c tests/test_path_alias.c tests/test_configlink.c tests/test_infrascan.c tests/test_worker_pool.c tests/test_parallel.c tests/test_project_resolve.c

TEST_WATCHER_SRCS = tests/test_watcher.c

Expand Down
16 changes: 12 additions & 4 deletions src/mcp/mcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ enum {
#include <sqlite3.h>
#include "cypher/cypher.h"
#include "pipeline/pipeline.h"
#include "pipeline/project_resolve.h"
#include "pipeline/pass_cross_repo.h"
#include "git/git_context.h"
#include "cli/cli.h"
Expand Down Expand Up @@ -4568,10 +4569,17 @@ static void detect_session(cbm_mcp_server_t *srv) {
* used by the pipeline, otherwise session queries look for a .db file
* that doesn't match the indexed project name. */
if (srv->session_root[0]) {
char *pname = cbm_project_name_from_path(srv->session_root);
if (pname) {
snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname);
free(pname);
char *existing = cbm_find_existing_project_name(srv->session_root);
if (existing) {
snprintf(srv->session_project, sizeof(srv->session_project), "%s", existing);
cbm_log_info("session.project.reuse", "project", existing, "path", srv->session_root);
free(existing);
} else {
char *pname = cbm_project_name_from_path(srv->session_root);
if (pname) {
snprintf(srv->session_project, sizeof(srv->session_project), "%s", pname);
free(pname);
}
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/pipeline/pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, PL_WAL_BUF = 1040 };
#define PL_NSEC_PER_SEC 1000000000LL
#include "pipeline/pipeline.h"
#include "pipeline/project_resolve.h"
#include "pipeline/artifact.h"
#include "pipeline/pipeline_internal.h"
#include "pipeline/pass_lsp_cross.h"
Expand Down Expand Up @@ -153,7 +154,8 @@ cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path,

p->repo_path = strdup(repo_path);
p->db_path = db_path ? strdup(db_path) : NULL;
p->project_name = cbm_project_name_from_path(repo_path);
char *existing = cbm_find_existing_project_name(repo_path);
p->project_name = existing ? existing : cbm_project_name_from_path(repo_path);
(void)cbm_git_context_resolve(repo_path, &p->git_ctx);
p->branch_qn = cbm_git_context_branch_qn(p->project_name, &p->git_ctx);
p->mode = mode;
Expand Down
148 changes: 148 additions & 0 deletions src/pipeline/project_resolve.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* project_resolve.c — Canonical path identity and duplicate-index prevention.
*/
#include "pipeline/project_resolve.h"
#include "pipeline/pipeline.h"
#include "foundation/platform.h"
#include "foundation/compat_fs.h"
#include "git/git_context.h"
#include "store/store.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz) {
if (!path || !out || out_sz == 0) {
return false;
}
out[0] = '\0';
#ifdef _WIN32
if (!_fullpath(out, path, out_sz)) {
return false;
}
cbm_normalize_path_sep(out);
#else
if (!realpath(path, out)) {
return false;
}
#endif
return out[0] != '\0';
}

bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz) {
if (!repo_path || !out || out_sz == 0) {
return false;
}

cbm_git_context_t ctx = {0};
if (cbm_git_context_resolve(repo_path, &ctx) == 0 && ctx.canonical_root &&
ctx.canonical_root[0]) {
snprintf(out, out_sz, "%s", ctx.canonical_root);
cbm_normalize_path_sep(out);
cbm_git_context_free(&ctx);
return true;
}
cbm_git_context_free(&ctx);
return cbm_path_canonicalize(repo_path, out, out_sz);
}

static bool identity_nested(const char *child, const char *parent) {
if (!child[0] || !parent[0]) {
return false;
}
if (strcmp(child, parent) == 0) {
return true;
}
size_t plen = strlen(parent);
if (strncmp(child, parent, plen) != 0) {
return false;
}
return child[plen] == '/';
}

static bool is_project_db_file(const char *name, size_t len) {
if (len < 5 || strcmp(name + len - 3, ".db") != 0) {
return false;
}
if (name[0] == '_') {
return false;
}
return true;
}

char *cbm_find_existing_project_name(const char *repo_path) {
if (!repo_path || !repo_path[0]) {
return NULL;
}

char query_key[4096];
if (!cbm_project_identity_key(repo_path, query_key, sizeof(query_key))) {
return NULL;
}

char cache_dir[1024];
snprintf(cache_dir, sizeof(cache_dir), "%s", cbm_resolve_cache_dir());

cbm_dir_t *d = cbm_opendir(cache_dir);
if (!d) {
return NULL;
}

char *best_name = NULL;
size_t best_root_len = 0;

cbm_dirent_t *entry;
while ((entry = cbm_readdir(d)) != NULL) {
const char *name = entry->name;
size_t len = strlen(name);
if (!is_project_db_file(name, len)) {
continue;
}

char db_path[2048];
snprintf(db_path, sizeof(db_path), "%s/%s", cache_dir, name);

cbm_store_t *store = cbm_store_open_path(db_path);
if (!store) {
continue;
}

char project_name[1024];
snprintf(project_name, sizeof(project_name), "%.*s", (int)(len - 3), name);

cbm_project_t proj = {0};
if (cbm_store_get_project(store, project_name, &proj) != CBM_STORE_OK || !proj.root_path) {
safe_str_free(&proj.name);
safe_str_free(&proj.indexed_at);
safe_str_free(&proj.root_path);
cbm_store_close(store);
continue;
}

char indexed_key[4096];
bool has_key = cbm_project_identity_key(proj.root_path, indexed_key, sizeof(indexed_key));

safe_str_free(&proj.name);
safe_str_free(&proj.indexed_at);
safe_str_free(&proj.root_path);
cbm_store_close(store);

if (!has_key) {
continue;
}

if (strcmp(query_key, indexed_key) == 0 || identity_nested(query_key, indexed_key) ||
identity_nested(indexed_key, query_key)) {
size_t root_len = strlen(indexed_key);
if (!best_name || root_len > best_root_len) {
free(best_name);
best_name = strdup(project_name);
best_root_len = root_len;
}
}
}

cbm_closedir(d);
return best_name;
}
17 changes: 17 additions & 0 deletions src/pipeline/project_resolve.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef CBM_PROJECT_RESOLVE_H
#define CBM_PROJECT_RESOLVE_H

#include <stdbool.h>
#include <stddef.h>

/* Canonicalize path (realpath / _fullpath). Returns false if path is invalid. */
bool cbm_path_canonicalize(const char *path, char *out, size_t out_sz);

/* Stable identity for dedup: git canonical_root when available, else canonical path. */
bool cbm_project_identity_key(const char *repo_path, char *out, size_t out_sz);

/* Return heap-allocated existing project name when repo_path matches a cached index
* (same identity or nested under an indexed root). Caller frees; NULL if no match. */
char *cbm_find_existing_project_name(const char *repo_path);

#endif
155 changes: 155 additions & 0 deletions tests/test_project_resolve.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* test_project_resolve.c — Canonical project identity and duplicate-index prevention.
*/
#include "../src/foundation/compat.h"
#include "test_framework.h"
#include "test_helpers.h"
#include "pipeline/project_resolve.h"
#include "pipeline/pipeline.h"
#include <store/store.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>

typedef struct {
const char *cache;
const char *project;
const char *root;
} seed_ctx_t;

typedef struct {
const char *query_root;
char **found;
} find_ctx_t;

typedef struct {
const char *root;
cbm_pipeline_t **pipeline;
} pipeline_ctx_t;

static void with_cache_dir(const char *cache, void (*fn)(void *), void *ctx) {
const char *saved = getenv("CBM_CACHE_DIR");
char *saved_copy = saved ? strdup(saved) : NULL;
cbm_setenv("CBM_CACHE_DIR", cache, 1);
fn(ctx);
if (saved_copy) {
cbm_setenv("CBM_CACHE_DIR", saved_copy, 1);
free(saved_copy);
} else {
cbm_unsetenv("CBM_CACHE_DIR");
}
}

static void seed_project_db(void *vctx) {
seed_ctx_t *ctx = (seed_ctx_t *)vctx;
char db_path[1024];
snprintf(db_path, sizeof(db_path), "%s/%s.db", ctx->cache, ctx->project);
cbm_store_t *store = cbm_store_open_path(db_path);
ASSERT_NOT_NULL(store);
ASSERT_EQ(cbm_store_upsert_project(store, ctx->project, ctx->root), CBM_STORE_OK);
cbm_store_close(store);
}

static void find_existing_project(void *vctx) {
find_ctx_t *ctx = (find_ctx_t *)vctx;
*(ctx->found) = cbm_find_existing_project_name(ctx->query_root);
}

static void open_pipeline_for_root(void *vctx) {
pipeline_ctx_t *ctx = (pipeline_ctx_t *)vctx;
*(ctx->pipeline) = cbm_pipeline_new(ctx->root, NULL, CBM_MODE_FAST);
}

TEST(project_resolve_path_canonicalize) {
char tmpdir[256];
snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm-projres-XXXXXX");
if (!cbm_mkdtemp(tmpdir))
FAIL("cbm_mkdtemp failed");

char file[512];
snprintf(file, sizeof(file), "%s/readme.txt", tmpdir);
th_write_file(file, "x");

char canon[1024];
ASSERT_TRUE(cbm_path_canonicalize(file, canon, sizeof(canon)));
ASSERT(strstr(canon, "readme.txt") != NULL);

test_rmdir_r(tmpdir);
PASS();
}

TEST(project_resolve_identity_key_stable) {
char key1[1024];
char key2[1024];
ASSERT_TRUE(cbm_project_identity_key("/tmp/foo/bar", key1, sizeof(key1)));
ASSERT_TRUE(cbm_project_identity_key("/tmp/foo/bar/", key2, sizeof(key2)));
ASSERT_STR_EQ(key1, key2);
PASS();
}

TEST(project_resolve_find_existing_by_root_path) {
char cache[256];
snprintf(cache, sizeof(cache), "/tmp/cbm-projres-cache-XXXXXX");
if (!cbm_mkdtemp(cache))
FAIL("cbm_mkdtemp failed");

char root[512];
snprintf(root, sizeof(root), "%s/repo-root", cache);
test_mkdirp(root);

seed_ctx_t seed = {.cache = cache, .project = "indexed-project", .root = root};
with_cache_dir(cache, seed_project_db, &seed);

char *found = NULL;
find_ctx_t fctx = {.query_root = root, .found = &found};
with_cache_dir(cache, find_existing_project, &fctx);

ASSERT_NOT_NULL(found);
ASSERT_STR_EQ(found, "indexed-project");
free(found);

char db_path[1024];
snprintf(db_path, sizeof(db_path), "%s/indexed-project.db", cache);
cbm_unlink(db_path);
test_rmdir_r(root);
cbm_rmdir(cache);
PASS();
}

TEST(project_resolve_pipeline_reuses_existing_name) {
char cache[256];
snprintf(cache, sizeof(cache), "/tmp/cbm-projres-pl-XXXXXX");
if (!cbm_mkdtemp(cache))
FAIL("cbm_mkdtemp failed");

char root[512];
snprintf(root, sizeof(root), "%s/worktree", cache);
test_mkdirp(root);

seed_ctx_t seed = {.cache = cache, .project = "canonical-name", .root = root};
with_cache_dir(cache, seed_project_db, &seed);

cbm_pipeline_t *p = NULL;
pipeline_ctx_t pctx = {.root = root, .pipeline = &p};
with_cache_dir(cache, open_pipeline_for_root, &pctx);

ASSERT_NOT_NULL(p);
ASSERT_STR_EQ(cbm_pipeline_project_name(p), "canonical-name");
cbm_pipeline_free(p);

char db_path[1024];
snprintf(db_path, sizeof(db_path), "%s/canonical-name.db", cache);
cbm_unlink(db_path);
test_rmdir_r(root);
cbm_rmdir(cache);
PASS();
}

SUITE(project_resolve) {
RUN_TEST(project_resolve_path_canonicalize);
RUN_TEST(project_resolve_identity_key_stable);
RUN_TEST(project_resolve_find_existing_by_root_path);
RUN_TEST(project_resolve_pipeline_reuses_existing_name);
}
Loading