Skip to content

Commit 9eea71f

Browse files
committed
Add PyCG sharding for level 2 analysis on large projects
- Introduces --pycg-shard/--no-pycg-shard to run PyCG independently per Python package root instead of over the whole project, with cross-package imports treated as ghost nodes. - Adds --pycg-shard-ceiling (default 100) to skip shards with too many files, and --pycg-shard-timeout (default 120s) as a final safety net for packages whose pointer fixpoint diverges indefinitely. - Adds test fixtures (decorators_and_hof, class_hierarchy, async_patterns, Flask 3.0.3, requests 2.31.0) and corresponding CLI tests with PyCG- specific edge assertions. Verified on a 6086-file project: 74,008 PyCG edges produced across 748/753 shards; 5 deep-OO framework shards timed out and were gracefully skipped. Signed-off-by: Saurabh Sinha <sinha108@gmail.com>
1 parent aab32e8 commit 9eea71f

357 files changed

Lines changed: 61566 additions & 89 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

codeanalyzer/__main__.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,48 @@ def main(
8080
verbosity: Annotated[
8181
int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
8282
] = 0,
83+
pycg_shard: Annotated[
84+
bool,
85+
typer.Option(
86+
"--pycg-shard/--no-pycg-shard",
87+
help=(
88+
"Shard PyCG call-graph analysis by Python package (level 2 only). "
89+
"When the project exceeds the 500-file ceiling, PyCG is run "
90+
"independently per top-level package with cross-package imports "
91+
"treated as ghost nodes. Without this flag, projects over the "
92+
"ceiling fall back to Jedi-only edges."
93+
),
94+
),
95+
] = False,
96+
pycg_shard_ceiling: Annotated[
97+
int,
98+
typer.Option(
99+
"--pycg-shard-ceiling",
100+
help=(
101+
"Maximum files per shard when --pycg-shard is active (default 100). "
102+
"Shards exceeding this limit are skipped; their call edges are "
103+
"omitted from the call graph (Jedi edges for those packages are "
104+
"still included). Lower values are safer for packages with deep "
105+
"class hierarchies or heavy import graphs."
106+
),
107+
min=1,
108+
),
109+
] = 100,
110+
pycg_shard_timeout: Annotated[
111+
int,
112+
typer.Option(
113+
"--pycg-shard-timeout",
114+
help=(
115+
"Per-shard wall-clock timeout in seconds when --pycg-shard is "
116+
"active (default 120). A shard that exceeds this limit is skipped "
117+
"gracefully. PyCG's fixpoint is bimodal: it either converges "
118+
"quickly or diverges indefinitely, so the timeout acts as a final "
119+
"safety net after the file-count ceiling. Set to 0 to disable. "
120+
"POSIX only (macOS / Linux); ignored on Windows."
121+
),
122+
min=0,
123+
),
124+
] = 120,
83125
):
84126
options = AnalysisOptions(
85127
input=input,
@@ -93,6 +135,9 @@ def main(
93135
cache_dir=cache_dir,
94136
clear_cache=clear_cache,
95137
verbosity=verbosity,
138+
pycg_shard=pycg_shard,
139+
pycg_shard_ceiling=pycg_shard_ceiling,
140+
pycg_shard_timeout=pycg_shard_timeout,
96141
)
97142

98143
_set_log_level(options.verbosity)

codeanalyzer/core.py

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -247,26 +247,33 @@ def __enter__(self) -> "Codeanalyzer":
247247
for dep_file, pip_args in dependency_files:
248248
if (self.project_dir / dep_file).exists():
249249
logger.info(f"Installing dependencies from {dep_file}")
250-
self._cmd_exec_helper(
251-
[str(venv_python), "-m", "pip", "install", "-U"] + pip_args + [str(self.project_dir / dep_file)],
252-
cwd=self.project_dir,
253-
check=True,
254-
)
250+
try:
251+
self._cmd_exec_helper(
252+
[str(venv_python), "-m", "pip", "install", "-U"] + pip_args + [str(self.project_dir / dep_file)],
253+
cwd=self.project_dir,
254+
check=True,
255+
)
256+
except subprocess.CalledProcessError as e:
257+
logger.warning(
258+
f"Dependency installation from {dep_file} failed (analysis will continue): {e}"
259+
)
255260

256261
# Handle Pipenv files
257262
if (self.project_dir / "Pipfile").exists():
258263
logger.info("Installing dependencies from Pipfile")
259-
# Note: This would require pipenv to be installed
260-
self._cmd_exec_helper(
261-
[str(venv_python), "-m", "pip", "install", "pipenv"],
262-
cwd=self.project_dir,
263-
check=True,
264-
)
265-
self._cmd_exec_helper(
266-
["pipenv", "install", "--dev"],
267-
cwd=self.project_dir,
268-
check=True,
269-
)
264+
try:
265+
self._cmd_exec_helper(
266+
[str(venv_python), "-m", "pip", "install", "pipenv"],
267+
cwd=self.project_dir,
268+
check=True,
269+
)
270+
self._cmd_exec_helper(
271+
["pipenv", "install", "--dev"],
272+
cwd=self.project_dir,
273+
check=True,
274+
)
275+
except subprocess.CalledProcessError as e:
276+
logger.warning(f"Pipenv dependency installation failed (analysis will continue): {e}")
270277

271278
# Handle conda environment files
272279
conda_files = ["conda.yml", "environment.yml"]
@@ -284,11 +291,14 @@ def __enter__(self) -> "Codeanalyzer":
284291

285292
if any((self.project_dir / file).exists() for file in package_definition_files):
286293
logger.info("Installing project in editable mode")
287-
self._cmd_exec_helper(
288-
[str(venv_python), "-m", "pip", "install", "-e", str(self.project_dir)],
289-
cwd=self.project_dir,
290-
check=True,
291-
)
294+
try:
295+
self._cmd_exec_helper(
296+
[str(venv_python), "-m", "pip", "install", "-e", str(self.project_dir)],
297+
cwd=self.project_dir,
298+
check=True,
299+
)
300+
except subprocess.CalledProcessError as e:
301+
logger.warning(f"Editable install failed (analysis will continue): {e}")
292302
else:
293303
logger.warning("No package definition files found, skipping editable installation")
294304

@@ -552,11 +562,18 @@ def _get_pycg_call_graph(
552562
continue with Jedi-only edges.
553563
"""
554564
try:
555-
pycg = PyCG(self.project_dir, skip_tests=self.skip_tests)
565+
pycg = PyCG(
566+
self.project_dir,
567+
skip_tests=self.skip_tests,
568+
shard=self.options.pycg_shard,
569+
shard_ceiling=self.options.pycg_shard_ceiling,
570+
shard_timeout=self.options.pycg_shard_timeout,
571+
)
556572
return pycg.build_call_graph_edges(symbol_table)
557573
except PyCGExceptions.PyCGImportError as exc:
558574
logger.warning(f"PyCG not installed — level 2 edges will be Jedi-only: {exc}")
559575
return []
560576
except PyCGExceptions.PyCGAnalysisError as exc:
561577
logger.warning(f"PyCG analysis failed — level 2 edges will be Jedi-only: {exc}")
578+
logger.debug("PyCG full traceback:", exc_info=True)
562579
return []

codeanalyzer/options/options.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ class AnalysisOptions:
2222
cache_dir: Optional[Path] = None
2323
clear_cache: bool = False
2424
verbosity: int = 0
25+
pycg_shard: bool = False
26+
pycg_shard_ceiling: int = 100
27+
pycg_shard_timeout: int = 120

0 commit comments

Comments
 (0)