|
| 1 | +(ns clojure-mcp.tools.deps-grep.core |
| 2 | + "Core implementation for searching dependency jars. |
| 3 | + Uses clojure CLI for classpath resolution and ripgrep for searching." |
| 4 | + (:require |
| 5 | + [clojure.string :as str] |
| 6 | + [clojure.java.shell :as shell] |
| 7 | + [clojure.java.io :as io] |
| 8 | + [clojure-mcp.tools.deps-common.jar-utils :as jar-utils] |
| 9 | + [clojure-mcp.tools.deps-sources.core :as deps-sources] |
| 10 | + [clojure-mcp.utils.shell :as shell-utils] |
| 11 | + [taoensso.timbre :as log])) |
| 12 | + |
| 13 | +;; Cache for base classpath jars, keyed by project directory |
| 14 | +(def ^:private classpath-cache (atom {})) |
| 15 | + |
| 16 | +;; Cache for library-filtered jars with sources, keyed by [project-dir library java-sources?] |
| 17 | +(def ^:private library-jars-cache (atom {})) |
| 18 | + |
| 19 | +(defn rg-available? |
| 20 | + "Check if ripgrep (rg) is available on the system." |
| 21 | + [] |
| 22 | + (shell-utils/binary-available? "rg")) |
| 23 | + |
| 24 | +(defn check-required-binaries! |
| 25 | + "Check that required binaries are available. Returns nil if all present, |
| 26 | + or an error map with :error and :missing-binaries keys." |
| 27 | + [] |
| 28 | + (let [required {"clojure" ["-Sdescribe"]} |
| 29 | + missing (->> required |
| 30 | + (keep (fn [[bin args]] |
| 31 | + (when-not (apply shell-utils/binary-available? bin args) |
| 32 | + bin))))] |
| 33 | + (when (seq missing) |
| 34 | + {:error (str "Required binaries not found: " (str/join ", " missing) |
| 35 | + ". Please install them to use deps_grep.") |
| 36 | + :missing-binaries (vec missing)}))) |
| 37 | + |
| 38 | +(defn get-classpath-jars |
| 39 | + "Run `clojure -Spath` in the given directory and return a vector of jar paths. |
| 40 | + Returns nil if classpath resolution fails." |
| 41 | + [project-dir] |
| 42 | + (log/debug "Resolving classpath for:" project-dir) |
| 43 | + (try |
| 44 | + (let [result (shell/with-sh-dir project-dir |
| 45 | + (shell/sh "clojure" "-Spath"))] |
| 46 | + (if (zero? (:exit result)) |
| 47 | + (let [classpath (:out result) |
| 48 | + ;; Use platform-specific path separator (: on Unix, ; on Windows) |
| 49 | + path-sep (re-pattern java.io.File/pathSeparator) |
| 50 | + jars (->> (str/split classpath path-sep) |
| 51 | + (filter #(str/ends-with? % ".jar")) |
| 52 | + (filter #(.exists (io/file %))) |
| 53 | + vec)] |
| 54 | + (log/debug "Found" (count jars) "jars on classpath") |
| 55 | + jars) |
| 56 | + (do |
| 57 | + (log/warn "clojure -Spath failed:" (:err result)) |
| 58 | + nil))) |
| 59 | + (catch Exception e |
| 60 | + (log/error e "Failed to resolve classpath") |
| 61 | + nil))) |
| 62 | + |
| 63 | +(defn find-sources-jar |
| 64 | + "Given a jar path, find the corresponding -sources.jar if it exists." |
| 65 | + [jar-path] |
| 66 | + (when (str/ends-with? jar-path ".jar") |
| 67 | + (let [sources-path (str/replace jar-path #"\.jar$" "-sources.jar") |
| 68 | + sources-file (io/file sources-path)] |
| 69 | + (when (.exists sources-file) |
| 70 | + sources-path)))) |
| 71 | + |
| 72 | +(defn needs-java-sources? |
| 73 | + "Check if the search options indicate we're looking for Java files." |
| 74 | + [{:keys [type glob]}] |
| 75 | + (or (= "java" type) |
| 76 | + (and glob (re-find #"\.java" glob)))) |
| 77 | + |
| 78 | +(defn get-jars-with-sources |
| 79 | + "Given a list of jars and search opts, return jars plus any available sources jars. |
| 80 | + When searching for Java files, downloads missing sources from Maven Central." |
| 81 | + [jars opts] |
| 82 | + (let [;; First find sources jars already in Maven cache |
| 83 | + existing-sources (->> jars |
| 84 | + (keep find-sources-jar) |
| 85 | + (remove (set jars)))] |
| 86 | + (if (needs-java-sources? opts) |
| 87 | + ;; For Java searches, also download missing sources |
| 88 | + (let [jars-with-sources (set (map #(str/replace % #"-sources\.jar$" ".jar") |
| 89 | + existing-sources)) |
| 90 | + jars-missing-sources (remove jars-with-sources jars) |
| 91 | + _ (log/debug "Checking for Java sources for" (count jars-missing-sources) "jars") |
| 92 | + downloaded-sources (deps-sources/ensure-sources-jars! jars-missing-sources)] |
| 93 | + (log/debug "Downloaded" (count downloaded-sources) "sources jars") |
| 94 | + (into (vec jars) (concat existing-sources downloaded-sources))) |
| 95 | + ;; For non-Java searches, just use existing sources |
| 96 | + (into (vec jars) existing-sources)))) |
| 97 | + |
| 98 | +(defn parse-library-filter |
| 99 | + "Parse a library filter string into group and optional artifact. |
| 100 | + Returns {:group \"group.id\"} or {:group \"group.id\" :artifact \"name\"}." |
| 101 | + [library] |
| 102 | + (let [parts (str/split library #"/" 2)] |
| 103 | + (if (= 2 (count parts)) |
| 104 | + {:group (first parts) :artifact (second parts)} |
| 105 | + {:group (first parts)}))) |
| 106 | + |
| 107 | +(defn filter-jars-by-library |
| 108 | + "Filter jars to only those matching the given library filter. |
| 109 | + Library can be a group ID (matches all artifacts) or group/artifact (exact match). |
| 110 | + Uses deps-sources/parse-maven-coords to extract coordinates from jar paths." |
| 111 | + [jars library] |
| 112 | + (let [{:keys [group artifact]} (parse-library-filter library)] |
| 113 | + (filterv (fn [jar-path] |
| 114 | + (when-let [coords (deps-sources/parse-maven-coords jar-path)] |
| 115 | + (and (= group (:group coords)) |
| 116 | + (or (nil? artifact) |
| 117 | + (= artifact (:artifact coords)))))) |
| 118 | + jars))) |
| 119 | + |
| 120 | +(defn cached-base-jars |
| 121 | + "Get base classpath jars with caching. Returns cached result if available." |
| 122 | + [project-dir] |
| 123 | + (or (get @classpath-cache project-dir) |
| 124 | + (when-let [jars (get-classpath-jars project-dir)] |
| 125 | + (swap! classpath-cache assoc project-dir jars) |
| 126 | + jars))) |
| 127 | + |
| 128 | +(defn clear-classpath-cache! |
| 129 | + "Clear all caches. Useful after deps changes." |
| 130 | + [] |
| 131 | + (reset! classpath-cache {}) |
| 132 | + (reset! library-jars-cache {})) |
| 133 | + |
| 134 | +(defn list-jar-entries |
| 135 | + "List all entries in a jar file. |
| 136 | + Returns a vector of entry paths or nil on error." |
| 137 | + [jar-path] |
| 138 | + (jar-utils/list-jar-entries jar-path)) |
| 139 | + |
| 140 | +(defn glob-matches? |
| 141 | + "Check if a path matches a glob pattern. |
| 142 | + Supports simple patterns like *.clj, *.{clj,cljs}" |
| 143 | + [pattern path] |
| 144 | + (if-not pattern |
| 145 | + true |
| 146 | + (let [;; Convert glob to regex, escaping all regex metacharacters first |
| 147 | + ;; then restoring glob wildcards |
| 148 | + pattern-regex (-> pattern |
| 149 | + (str/replace #"[.+^$|()\\]" "\\\\$0") |
| 150 | + (str/replace "*" ".*") |
| 151 | + (str/replace #"\{([^}]+)\}" |
| 152 | + (fn [[_ alts]] |
| 153 | + (str "(" (str/replace alts "," "|") ")"))))] |
| 154 | + (boolean (re-find (re-pattern (str pattern-regex "$")) path))))) |
| 155 | + |
| 156 | +(defn type-to-glob |
| 157 | + "Convert a file type (like 'clj') to a glob pattern." |
| 158 | + [type-str] |
| 159 | + (when type-str |
| 160 | + (str "*." type-str))) |
| 161 | + |
| 162 | +(defn filter-entries |
| 163 | + "Filter jar entries by glob and/or type patterns." |
| 164 | + [entries {:keys [glob type]}] |
| 165 | + (let [effective-glob (or glob (type-to-glob type))] |
| 166 | + (if effective-glob |
| 167 | + (filter #(glob-matches? effective-glob %) entries) |
| 168 | + entries))) |
| 169 | + |
| 170 | +(defn search-jar-entry-rg |
| 171 | + "Search using ripgrep. Reads jar entry via Java, pipes content to rg via stdin. |
| 172 | + Supports context lines and multiline patterns." |
| 173 | + [jar-path entry-path pattern {:keys [case-insensitive context-before context-after |
| 174 | + context multiline]}] |
| 175 | + (try |
| 176 | + (when-let [content (jar-utils/read-jar-entry jar-path entry-path)] |
| 177 | + (let [rg-args (cond-> ["rg" "-n"] |
| 178 | + case-insensitive (conj "-i") |
| 179 | + multiline (conj "-U") |
| 180 | + context-before (conj "-B" (str context-before)) |
| 181 | + context-after (conj "-A" (str context-after)) |
| 182 | + context (conj "-C" (str context))) |
| 183 | + rg-args (conj rg-args pattern) |
| 184 | + result (apply shell/sh (concat rg-args [:in content]))] |
| 185 | + (when (zero? (:exit result)) |
| 186 | + (let [matches (->> (str/split-lines (:out result)) |
| 187 | + (keep (fn [line] |
| 188 | + (when-let [[_ line-num sep content] |
| 189 | + (re-matches #"(\d+)([:|-])(.*)$" line)] |
| 190 | + {:line-num (parse-long line-num) |
| 191 | + :content content |
| 192 | + :match? (= sep ":")}))))] |
| 193 | + (when (seq matches) |
| 194 | + {:jar jar-path |
| 195 | + :entry entry-path |
| 196 | + :matches (vec matches)}))))) |
| 197 | + (catch Exception e |
| 198 | + (log/debug "Error searching" entry-path "in" jar-path ":" (.getMessage e)) |
| 199 | + nil))) |
| 200 | + |
| 201 | +(defn search-jar-entry-fallback |
| 202 | + "Fallback search using Java jar reading and Clojure regex. |
| 203 | + Does not support context lines or multiline." |
| 204 | + [jar-path entry-path pattern {:keys [case-insensitive]}] |
| 205 | + (try |
| 206 | + (when-let [content (jar-utils/read-jar-entry jar-path entry-path)] |
| 207 | + (let [lines (str/split-lines content) |
| 208 | + pattern-re (re-pattern (if case-insensitive |
| 209 | + (str "(?i)" pattern) |
| 210 | + pattern)) |
| 211 | + matches (keep-indexed |
| 212 | + (fn [idx line] |
| 213 | + (when (re-find pattern-re line) |
| 214 | + {:line-num (inc idx) |
| 215 | + :content line |
| 216 | + :match? true})) |
| 217 | + lines)] |
| 218 | + (when (seq matches) |
| 219 | + {:jar jar-path |
| 220 | + :entry entry-path |
| 221 | + :matches (vec matches)}))) |
| 222 | + (catch Exception e |
| 223 | + (log/debug "Error searching" entry-path "in" jar-path ":" (.getMessage e)) |
| 224 | + nil))) |
| 225 | + |
| 226 | +(defn search-jar-entry |
| 227 | + "Search a single entry within a jar. Uses ripgrep if available, otherwise |
| 228 | + falls back to Clojure regex (without context/multiline support). |
| 229 | +
|
| 230 | + Returns a map with :jar, :entry, and :matches. Each match has :line-num, |
| 231 | + :content, and :match? (true for matches, false for context lines)." |
| 232 | + [jar-path entry-path pattern opts] |
| 233 | + (if (rg-available?) |
| 234 | + (search-jar-entry-rg jar-path entry-path pattern opts) |
| 235 | + (search-jar-entry-fallback jar-path entry-path pattern opts))) |
| 236 | + |
| 237 | +(defn deps-grep |
| 238 | + "Search for a pattern in dependency jars. |
| 239 | +
|
| 240 | + Arguments: |
| 241 | + - project-dir: Directory containing deps.edn |
| 242 | + - pattern: Regex pattern to search for |
| 243 | + - opts: Map of options |
| 244 | + :library - Required. Maven group or group/artifact to search |
| 245 | + :glob - Filter files by glob pattern (e.g., \"*.clj\") |
| 246 | + :type - Filter files by type (e.g., \"clj\", \"java\") |
| 247 | + :output-mode - :content, :files-with-matches, or :count |
| 248 | + :case-insensitive - Case insensitive search |
| 249 | + :line-numbers - Include line numbers (default true for content mode) |
| 250 | + :context-before - Lines before match |
| 251 | + :context-after - Lines after match |
| 252 | + :context - Lines before and after |
| 253 | + :head-limit - Limit number of results |
| 254 | + :multiline - Enable multiline matching |
| 255 | +
|
| 256 | + Returns a map with :results and optionally :truncated. |
| 257 | +
|
| 258 | + Requires: clojure CLI. Optional: ripgrep (rg) for context/multiline." |
| 259 | + [project-dir pattern opts] |
| 260 | + (if-let [binary-error (check-required-binaries!)] |
| 261 | + binary-error |
| 262 | + (let [base-jars (cached-base-jars project-dir)] |
| 263 | + (if-not base-jars |
| 264 | + {:error "Failed to resolve classpath. Is this a deps.edn project?"} |
| 265 | + (let [library (:library opts) |
| 266 | + cache-key [project-dir library (needs-java-sources? opts)] |
| 267 | + filtered-jars (filter-jars-by-library base-jars library)] |
| 268 | + (if (empty? filtered-jars) |
| 269 | + {:error (str "No libraries found matching: " (:library opts) |
| 270 | + ". Use deps_list to see available libraries.")} |
| 271 | + (let [;; Get jars with sources (cached per library) |
| 272 | + jars (or (get @library-jars-cache cache-key) |
| 273 | + (let [result (get-jars-with-sources filtered-jars opts)] |
| 274 | + (swap! library-jars-cache assoc cache-key result) |
| 275 | + result)) |
| 276 | + {:keys [output-mode head-limit] |
| 277 | + :or {output-mode :content}} opts |
| 278 | + all-results (atom []) |
| 279 | + result-count (atom 0) |
| 280 | + limit-reached (atom false)] |
| 281 | + ;; Search each jar |
| 282 | + (doseq [jar jars |
| 283 | + :while (not @limit-reached)] |
| 284 | + (when-let [entries (list-jar-entries jar)] |
| 285 | + (let [filtered-entries (filter-entries entries opts)] |
| 286 | + (doseq [entry filtered-entries |
| 287 | + :while (not @limit-reached)] |
| 288 | + (when-let [match (search-jar-entry jar entry pattern opts)] |
| 289 | + (case output-mode |
| 290 | + :files-with-matches |
| 291 | + (do |
| 292 | + (swap! all-results conj {:jar (:jar match) |
| 293 | + :entry (:entry match)}) |
| 294 | + (swap! result-count inc)) |
| 295 | + |
| 296 | + :count |
| 297 | + (swap! result-count + (count (:matches match))) |
| 298 | + |
| 299 | + ;; :content (default) |
| 300 | + (do |
| 301 | + (swap! all-results conj match) |
| 302 | + (swap! result-count + (count (:matches match))))) |
| 303 | + |
| 304 | + (when (and head-limit (>= @result-count head-limit)) |
| 305 | + (reset! limit-reached true))))))) |
| 306 | + |
| 307 | + (cond-> {:results @all-results} |
| 308 | + (= output-mode :count) (assoc :count @result-count) |
| 309 | + @limit-reached (assoc :truncated true))))))))) |
| 310 | + |
0 commit comments