Skip to content

Commit 1a2cdcc

Browse files
authored
Merge pull request #49 from codellm-devkit/feat/jedi-shard-planner
PyCG sharding: Jedi planner + adaptive decomposition of runaways
2 parents 0799828 + f72ef00 commit 1a2cdcc

8 files changed

Lines changed: 1106 additions & 26 deletions

File tree

codeanalyzer/__main__.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from codeanalyzer.utils import _set_log_level, logger
88
from codeanalyzer.config import OutputFormat
99
from codeanalyzer.schema import model_dump_json
10-
from codeanalyzer.options import AnalysisOptions, EmitTarget
10+
from codeanalyzer.options import AnalysisOptions, EmitTarget, ShardStrategy
1111

1212

1313
def main(
@@ -186,6 +186,36 @@ def main(
186186
min=0,
187187
),
188188
] = 120,
189+
pycg_shard_strategy: Annotated[
190+
ShardStrategy,
191+
typer.Option(
192+
"--pycg-shard-strategy",
193+
help=(
194+
"How --pycg-shard groups files (level 2 only). 'jedi' (default) "
195+
"partitions the Jedi module-dependency graph (SCC + Louvain) so "
196+
"tightly-coupled modules co-compute and few call edges are "
197+
"severed between shards; import cycles are never split. "
198+
"'package' uses the legacy one-shard-per-package-directory "
199+
"grouping."
200+
),
201+
),
202+
] = ShardStrategy.JEDI,
203+
pycg_max_iter: Annotated[
204+
int,
205+
typer.Option(
206+
"--pycg-max-iter",
207+
help=(
208+
"Cap on PyCG's fixpoint passes per shard/project (level 2; "
209+
"default 50). PyCG iterates until its points-to state stops "
210+
"changing, but its access-path domain has no convergence bound, "
211+
"so heavy metaclass/mixin code (e.g. an ORM) can loop with each "
212+
"pass costing seconds. The cap returns a sound-but-incomplete "
213+
"call graph instead of looping until the timeout kills it. "
214+
"Set to -1 for PyCG's unbounded run-to-convergence behaviour."
215+
),
216+
min=-1,
217+
),
218+
] = 50,
189219
):
190220
options = AnalysisOptions(
191221
input=input,
@@ -209,6 +239,8 @@ def main(
209239
pycg_shard=pycg_shard,
210240
pycg_shard_ceiling=pycg_shard_ceiling,
211241
pycg_shard_timeout=pycg_shard_timeout,
242+
pycg_shard_strategy=pycg_shard_strategy,
243+
pycg_max_iter=pycg_max_iter,
212244
)
213245

214246
_set_log_level(options.verbosity)

codeanalyzer/core.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,9 @@ def analyze(self) -> PyApplication:
433433
logger.info("✅ Jedi: %d edges in %.1fs", len(call_graph), time.perf_counter() - t0_jedi)
434434

435435
if self.analysis_level >= 2:
436-
# Level 2: also add PyCG edges.
437-
pycg_edges = self._get_pycg_call_graph(symbol_table)
436+
# Level 2: also add PyCG edges. The Jedi edges double as the
437+
# coupling graph that drives coupling-aware PyCG sharding.
438+
pycg_edges = self._get_pycg_call_graph(symbol_table, jedi_edges)
438439
call_graph = merge_edges(call_graph, pycg_edges)
439440

440441
call_graph = filter_external_edges(call_graph, symbol_table)
@@ -661,13 +662,18 @@ def _build_symbol_table(self, cached_symbol_table: Optional[Dict[str, PyModule]]
661662
def _get_pycg_call_graph(
662663
self,
663664
symbol_table: Dict[str, PyModule],
665+
jedi_edges: List[PyCallEdge],
664666
) -> List[PyCallEdge]:
665667
"""Build PyCG-resolved call edges.
666668
667669
Runs PyCG's iterative name-pointer analysis over the whole project
668670
and returns edges with ``provenance=["pycg"]``. Falls back to an
669671
empty list and logs a warning on any failure so the caller can
670672
continue with Jedi-only edges.
673+
674+
*jedi_edges* are the level-1 call edges; under the ``jedi`` shard
675+
strategy they drive coupling-aware partitioning (see
676+
:func:`shard_planner.plan_shards`).
671677
"""
672678
try:
673679
pycg = PyCG(
@@ -676,9 +682,11 @@ def _get_pycg_call_graph(
676682
shard=self.options.pycg_shard,
677683
shard_ceiling=self.options.pycg_shard_ceiling,
678684
shard_timeout=self.options.pycg_shard_timeout,
685+
shard_strategy=self.options.pycg_shard_strategy,
686+
max_iter=self.options.pycg_max_iter,
679687
using_ray=self.using_ray,
680688
)
681-
return pycg.build_call_graph_edges(symbol_table)
689+
return pycg.build_call_graph_edges(symbol_table, jedi_edges=jedi_edges)
682690
except PyCGExceptions.PyCGImportError as exc:
683691
logger.warning(f"PyCG not installed — level 2 edges will be Jedi-only: {exc}")
684692
return []

codeanalyzer/options/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .options import AnalysisOptions, EmitTarget, OutputFormat
1+
from .options import AnalysisOptions, EmitTarget, OutputFormat, ShardStrategy
22

3-
__all__ = ["AnalysisOptions", "EmitTarget", "OutputFormat"]
3+
__all__ = ["AnalysisOptions", "EmitTarget", "OutputFormat", "ShardStrategy"]

codeanalyzer/options/options.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@ class EmitTarget(str, Enum):
2323
SCHEMA = "schema"
2424

2525

26+
class ShardStrategy(str, Enum):
27+
"""How ``--pycg-shard`` groups files into shards (level 2 only).
28+
29+
- ``jedi`` : partition the Jedi module-dependency graph (strongly-
30+
connected-component condensation + Louvain) so tightly-
31+
coupled modules co-compute and few call edges are severed
32+
between shards. Import cycles are never split.
33+
- ``package`` : legacy one-shard-per-package-directory grouping.
34+
"""
35+
36+
JEDI = "jedi"
37+
PACKAGE = "package"
38+
39+
2640
@dataclass
2741
class AnalysisOptions:
2842
input: Path
@@ -46,3 +60,5 @@ class AnalysisOptions:
4660
pycg_shard: bool = False
4761
pycg_shard_ceiling: int = 100
4862
pycg_shard_timeout: int = 120
63+
pycg_shard_strategy: ShardStrategy = ShardStrategy.JEDI
64+
pycg_max_iter: int = 50

0 commit comments

Comments
 (0)