Skip to content

Commit 53aa1fd

Browse files
committed
feat(java): add a read-only Neo4j analysis backend (parity with Python/TypeScript)
Adds cldk.analysis.java.neo4j.JNeo4jBackend, a read-only Cypher client that reconstructs the canonical JApplication from the graph codeanalyzer-java (>=2.4.0) emits with --emit neo4j, then answers all 36 JavaAnalysisBackend queries with the in-memory backend's logic. Selected by passing a Neo4jConnectionConfig as the backend= config to CLDK.java()/JavaAnalysis (JavaBackend union extended). Verified 1-to-1 against JCodeanalyzer on the daytrader8 sample: everything the graph contains reconstructs identically. Three producer-side gaps in the 2.4.0 emitter are tracked upstream (codeanalyzer-java#156/#157/#158), not query bugs. - backend_config: JavaBackend = Union[CodeAnalyzerConfig, Neo4jConnectionConfig] - java_analysis/core: select JNeo4jBackend by config type; allow project_path=None - bump codeanalyzer-java pin to 2.4.0 (adds the Neo4j emitter) - tests: selection + ABC contract (run anywhere) + skip-by-default daytrader parity - README: mermaid + backends table show Java's read-only Neo4j backend Closes #167
1 parent e971175 commit 53aa1fd

13 files changed

Lines changed: 1445 additions & 220 deletions

CHANGELOG.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6363
where calls to a bare module name that is also imported (e.g. `os`/`re`/`json`) are dropped from
6464
the emitted call graph. `PythonAnalysis` / `CLDK.analysis(language="python")` accept the same
6565
optional `neo4j_config`.
66-
- Bumped `codeanalyzer-python` to `0.2.0` (adds the Neo4j graph emitter).
66+
- Read-only Neo4j-backed **Java** analysis backend (`cldk.analysis.java.neo4j.JNeo4jBackend`),
67+
completing Neo4j parity across all three languages. It reconstructs the canonical `JApplication`
68+
from the graph `codeanalyzer-java` (>= 2.4.0) emits with `--emit neo4j` and answers all 36
69+
`JavaAnalysisBackend` queries with the in-memory backend's logic. Verified against the daytrader8
70+
sample (145 classes): everything the graph actually contains reconstructs identically to
71+
`JCodeanalyzer`. Three producer-side gaps in the 2.4.0 emitter make the graph an incomplete
72+
projection (tracked upstream, not query-layer bugs): all fields of a class collapse to one node
73+
(codeanalyzer-java#156), imports lose the type name (codeanalyzer-java#157), and `J_CALLS`
74+
materializes only a fraction of the call graph (codeanalyzer-java#158). `JavaAnalysis` /
75+
`CLDK.java(...)` accept a `Neo4jConnectionConfig` as the `backend=` config to select it.
76+
- Bumped `codeanalyzer-python` to `0.2.0` (adds the Neo4j graph emitter); bumped `codeanalyzer-java`
77+
to `2.4.0` (adds the Neo4j graph emitter).
6778
- Optional `neo4j` extra (`pip install cldk[neo4j]`) for the Neo4j Python driver.
6879

6980
## [v1.0.7] - 2026-02-14

README.md

Lines changed: 124 additions & 193 deletions
Large diffs are not rendered by default.

cldk/analysis/commons/backend_config.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,8 @@ class Neo4jConnectionConfig:
9999
application_name: str | None = None
100100

101101

102-
# Per-language discriminated unions the facades match on. Java has no Neo4j backend yet, so its
103-
# only admissible config is the codeanalyzer one.
104-
JavaBackend = CodeAnalyzerConfig
102+
# Per-language discriminated unions the facades match on.
103+
JavaBackend = Union[CodeAnalyzerConfig, Neo4jConnectionConfig]
105104
PyBackend = Union[PyCodeAnalyzerConfig, Neo4jConnectionConfig]
106105
TSBackend = Union[CodeAnalyzerConfig, Neo4jConnectionConfig]
107106

cldk/analysis/java/java_analysis.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,13 @@
5252

5353
from tree_sitter import Tree
5454

55-
from cldk.analysis.commons.backend_config import CodeAnalyzerConfig, JavaBackend, cache_subdir
55+
from cldk.analysis.commons.backend_config import CodeAnalyzerConfig, JavaBackend, Neo4jConnectionConfig, cache_subdir
5656
from cldk.analysis.commons.treesitter import TreesitterJava
5757
from cldk.models.java import JCallable
5858
from cldk.models.java import JApplication
5959
from cldk.models.java.models import JCRUDOperation, JComment, JCompilationUnit, JMethodDetail, JType, JField
6060
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
61+
from cldk.analysis.java.neo4j import JNeo4jBackend
6162
from cldk.analysis.java.backend import JavaAnalysisBackend
6263

6364

@@ -149,22 +150,33 @@ def __init__(
149150
self.eager_analysis = eager_analysis
150151
self.target_files = target_files
151152
self.backend_config: JavaBackend = backend if backend is not None else CodeAnalyzerConfig()
152-
# Java has a single backend family; the config only carries the cache root. analysis.json
153-
# is cached under <cache_dir>/java (None in source_code mode, where the analyzer streams
154-
# results over a pipe).
155-
cache_path = cache_subdir(self.backend_config.cache_dir, project_dir, "java")
156-
if cache_path is not None:
157-
cache_path.mkdir(parents=True, exist_ok=True)
158153
self.treesitter_java: TreesitterJava = TreesitterJava()
159-
# Initialize the analysis backend
160-
self.backend: JavaAnalysisBackend = JCodeanalyzer(
161-
project_dir=self.project_dir,
162-
source_code=self.source_code,
163-
eager_analysis=self.eager_analysis,
164-
analysis_level=self.analysis_level,
165-
analysis_json_path=cache_path,
166-
target_files=self.target_files,
167-
)
154+
self.backend: JavaAnalysisBackend
155+
if isinstance(self.backend_config, Neo4jConnectionConfig):
156+
# Read-only: the graph is populated out of band; the SDK only polls it.
157+
cfg = self.backend_config
158+
application_name = cfg.application_name or (Path(project_dir).name if project_dir else None)
159+
self.backend = JNeo4jBackend(
160+
neo4j_uri=cfg.uri,
161+
neo4j_username=cfg.username,
162+
neo4j_password=cfg.password,
163+
neo4j_database=cfg.database,
164+
application_name=application_name,
165+
)
166+
else:
167+
# The config only carries the cache root. analysis.json is cached under <cache_dir>/java
168+
# (None in source_code mode, where the analyzer streams results over a pipe).
169+
cache_path = cache_subdir(self.backend_config.cache_dir, project_dir, "java")
170+
if cache_path is not None:
171+
cache_path.mkdir(parents=True, exist_ok=True)
172+
self.backend = JCodeanalyzer(
173+
project_dir=self.project_dir,
174+
source_code=self.source_code,
175+
eager_analysis=self.eager_analysis,
176+
analysis_level=self.analysis_level,
177+
analysis_json_path=cache_path,
178+
target_files=self.target_files,
179+
)
168180

169181
def get_imports(self) -> List[str]:
170182
"""Return all import statements in the source code.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
################################################################################
2+
# Copyright IBM Corporation 2026
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
################################################################################
16+
17+
"""Read-only Neo4j-backed Java analysis backend (Cypher queries over the codeanalyzer-java graph)."""
18+
19+
from cldk.analysis.java.neo4j.config import Neo4jConnectionConfig
20+
from cldk.analysis.java.neo4j.neo4j_backend import JNeo4jBackend
21+
22+
__all__ = ["JNeo4jBackend", "Neo4jConnectionConfig"]

cldk/analysis/java/neo4j/config.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
################################################################################
2+
# Copyright IBM Corporation 2026
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
################################################################################
16+
17+
"""Connection settings for the read-only Neo4j-backed Java analysis backend.
18+
19+
The definition has been hoisted to :mod:`cldk.analysis.commons.backend_config`; it is re-exported
20+
here for symmetry with the Python and TypeScript backends.
21+
"""
22+
23+
from __future__ import annotations
24+
25+
from cldk.analysis.commons.backend_config import Neo4jConnectionConfig
26+
27+
__all__ = ["Neo4jConnectionConfig"]

0 commit comments

Comments
 (0)