Skip to content

Commit 8898e4e

Browse files
authored
Merge pull request #50 from codellm-devkit/feat/jedi-shard-planner
Level 2: replace CodeQL with PyCG, add coupling-aware adaptive sharding
2 parents 2bae291 + f72ef00 commit 8898e4e

373 files changed

Lines changed: 63105 additions & 909 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

codeanalyzer/__main__.py

Lines changed: 90 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from codeanalyzer.utils import _set_log_level, logger
88
from codeanalyzer.config import OutputFormat
99
from codeanalyzer.schema import model_dump_json
10-
from codeanalyzer.options import AnalysisOptions, EmitTarget
10+
from codeanalyzer.options import AnalysisOptions, EmitTarget, ShardStrategy
1111

1212

1313
def main(
@@ -83,9 +83,16 @@ def main(
8383
help="Neo4j database name (default: server default). [env: NEO4J_DATABASE]",
8484
),
8585
] = None,
86-
using_codeql: Annotated[
87-
bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
88-
] = False,
86+
analysis_level: Annotated[
87+
int,
88+
typer.Option(
89+
"-a",
90+
"--analysis-level",
91+
help="Analysis depth: 1=symbol table+Jedi call graph, 2=+PyCG call graph.",
92+
min=1,
93+
max=2,
94+
),
95+
] = 1,
8996
using_ray: Annotated[
9097
bool,
9198
typer.Option("--ray/--no-ray", help="Enable Ray for distributed analysis."),
@@ -137,6 +144,78 @@ def main(
137144
verbosity: Annotated[
138145
int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
139146
] = 0,
147+
pycg_shard: Annotated[
148+
bool,
149+
typer.Option(
150+
"--pycg-shard/--no-pycg-shard",
151+
help=(
152+
"Shard PyCG call-graph analysis by Python package (level 2 only). "
153+
"When the project exceeds the 500-file ceiling, PyCG is run "
154+
"independently per top-level package with cross-package imports "
155+
"treated as ghost nodes. Without this flag, projects over the "
156+
"ceiling fall back to Jedi-only edges."
157+
),
158+
),
159+
] = False,
160+
pycg_shard_ceiling: Annotated[
161+
int,
162+
typer.Option(
163+
"--pycg-shard-ceiling",
164+
help=(
165+
"Maximum files per shard when --pycg-shard is active (default 100). "
166+
"Shards exceeding this limit are skipped; their call edges are "
167+
"omitted from the call graph (Jedi edges for those packages are "
168+
"still included). Lower values are safer for packages with deep "
169+
"class hierarchies or heavy import graphs."
170+
),
171+
min=1,
172+
),
173+
] = 100,
174+
pycg_shard_timeout: Annotated[
175+
int,
176+
typer.Option(
177+
"--pycg-shard-timeout",
178+
help=(
179+
"Per-shard wall-clock timeout in seconds when --pycg-shard is "
180+
"active (default 120). A shard that exceeds this limit is skipped "
181+
"gracefully. PyCG's fixpoint is bimodal: it either converges "
182+
"quickly or diverges indefinitely, so the timeout acts as a final "
183+
"safety net after the file-count ceiling. Set to 0 to disable. "
184+
"POSIX only (macOS / Linux); ignored on Windows."
185+
),
186+
min=0,
187+
),
188+
] = 120,
189+
pycg_shard_strategy: Annotated[
190+
ShardStrategy,
191+
typer.Option(
192+
"--pycg-shard-strategy",
193+
help=(
194+
"How --pycg-shard groups files (level 2 only). 'jedi' (default) "
195+
"partitions the Jedi module-dependency graph (SCC + Louvain) so "
196+
"tightly-coupled modules co-compute and few call edges are "
197+
"severed between shards; import cycles are never split. "
198+
"'package' uses the legacy one-shard-per-package-directory "
199+
"grouping."
200+
),
201+
),
202+
] = ShardStrategy.JEDI,
203+
pycg_max_iter: Annotated[
204+
int,
205+
typer.Option(
206+
"--pycg-max-iter",
207+
help=(
208+
"Cap on PyCG's fixpoint passes per shard/project (level 2; "
209+
"default 50). PyCG iterates until its points-to state stops "
210+
"changing, but its access-path domain has no convergence bound, "
211+
"so heavy metaclass/mixin code (e.g. an ORM) can loop with each "
212+
"pass costing seconds. The cap returns a sound-but-incomplete "
213+
"call graph instead of looping until the timeout kills it. "
214+
"Set to -1 for PyCG's unbounded run-to-convergence behaviour."
215+
),
216+
min=-1,
217+
),
218+
] = 50,
140219
):
141220
options = AnalysisOptions(
142221
input=input,
@@ -148,7 +227,7 @@ def main(
148227
neo4j_user=neo4j_user,
149228
neo4j_password=neo4j_password,
150229
neo4j_database=neo4j_database,
151-
using_codeql=using_codeql,
230+
analysis_level=analysis_level,
152231
using_ray=using_ray,
153232
rebuild_analysis=rebuild_analysis,
154233
skip_tests=skip_tests,
@@ -157,6 +236,11 @@ def main(
157236
cache_dir=cache_dir,
158237
clear_cache=clear_cache,
159238
verbosity=verbosity,
239+
pycg_shard=pycg_shard,
240+
pycg_shard_ceiling=pycg_shard_ceiling,
241+
pycg_shard_timeout=pycg_shard_timeout,
242+
pycg_shard_strategy=pycg_shard_strategy,
243+
pycg_max_iter=pycg_max_iter,
160244
)
161245

162246
_set_log_level(options.verbosity)
@@ -230,7 +314,7 @@ def _write_output(artifacts, output_dir: Path, format: OutputFormat):
230314
app = typer.Typer(
231315
callback=main,
232316
name="canpy",
233-
help="Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.",
317+
help="Static Analysis on Python source code using Jedi, PyCG and Tree sitter.",
234318
invoke_without_command=True,
235319
no_args_is_help=True,
236320
add_completion=False,

0 commit comments

Comments
 (0)