1515 merge_edges ,
1616 resolve_unresolved_constructors ,
1717)
18- from codeanalyzer .semantic_analysis .codeql import CodeQLLoader
19- from codeanalyzer .semantic_analysis .codeql .codeql_analysis import CodeQL
20- from codeanalyzer .semantic_analysis .codeql .codeql_exceptions import CodeQLExceptions
18+ from codeanalyzer .semantic_analysis .pycg import PyCG , PyCGExceptions
2119from codeanalyzer .syntactic_analysis .exceptions import SymbolTableBuilderRayError
2220from codeanalyzer .syntactic_analysis .symbol_table_builder import SymbolTableBuilder
2321from codeanalyzer .utils import ProgressBar
@@ -48,7 +46,7 @@ def _process_file_with_ray(py_file: Union[Path, str], project_dir: Union[Path, s
4846
4947
5048class Codeanalyzer :
51- """Core functionality for CodeQL analysis .
49+ """Core static analysis engine for Python projects .
5250
5351 Args:
5452 options (AnalysisOptions): Analysis configuration options containing all necessary parameters.
@@ -58,15 +56,12 @@ def __init__(self, options: AnalysisOptions) -> None:
5856 self .options = options
5957 self .project_dir = Path (options .input ).resolve ()
6058 self .skip_tests = options .skip_tests
61- self .using_codeql = options .using_codeql
59+ self .analysis_level = options .analysis_level
6260 self .rebuild_analysis = options .rebuild_analysis
6361 self .cache_dir = (
6462 options .cache_dir .resolve () if options .cache_dir is not None else self .project_dir
6563 ) / ".codeanalyzer"
6664 self .clear_cache = options .clear_cache
67- self .db_path : Optional [Path ] = None
68- self .codeql_bin : Optional [Path ] = None
69- self .codeql_packs_dir : Optional [Path ] = None
7065 self .virtualenv : Optional [Path ] = None
7166 self .using_ray : bool = options .using_ray
7267 self .file_name : Optional [Path ] = options .file_name
@@ -297,60 +292,6 @@ def __enter__(self) -> "Codeanalyzer":
297292 else :
298293 logger .warning ("No package definition files found, skipping editable installation" )
299294
300- if self .using_codeql :
301- logger .info (f"(Re-)initializing CodeQL analysis for { self .project_dir } " )
302-
303- # Resolve the CLI binary before anything else uses it: DB build
304- # below needs it, and so does every subsequent query run.
305- self .codeql_bin = self ._ensure_codeql_bin ()
306- # Download the standard query library pack (idempotent). The
307- # CLI install ships only the language extractors; the
308- # ``codeql/python-all`` library pack must be fetched separately.
309- self .codeql_packs_dir = self ._ensure_codeql_packs (self .codeql_bin )
310-
311- cache_root = self .cache_dir / "codeql"
312- cache_root .mkdir (parents = True , exist_ok = True )
313- self .db_path = cache_root / f"{ self .project_dir .name } -db"
314- self .db_path .mkdir (exist_ok = True )
315-
316- checksum_file = self .db_path / ".checksum"
317- current_checksum = self ._compute_checksum (self .project_dir )
318-
319- def is_cache_valid () -> bool :
320- if not (self .db_path / "db-python" ).exists ():
321- return False
322- if not checksum_file .exists ():
323- return False
324- return checksum_file .read_text ().strip () == current_checksum
325-
326- if self .rebuild_analysis or not is_cache_valid ():
327- logger .info ("Creating new CodeQL database..." )
328-
329- cmd = [
330- str (self .codeql_bin ),
331- "database" ,
332- "create" ,
333- str (self .db_path ),
334- f"--source-root={ self .project_dir } " ,
335- "--language=python" ,
336- "--overwrite" ,
337- ]
338-
339- proc = subprocess .Popen (
340- cmd , stdout = subprocess .DEVNULL , stderr = subprocess .PIPE
341- )
342- _ , err = proc .communicate ()
343-
344- if proc .returncode != 0 :
345- raise CodeQLExceptions .CodeQLDatabaseBuildException (
346- f"Error building CodeQL database:\n { err .decode ()} "
347- )
348-
349- checksum_file .write_text (current_checksum )
350-
351- else :
352- logger .info (f"Reusing cached CodeQL DB at { self .db_path } " )
353-
354295 return self
355296
356297 def __exit__ (self , * args , ** kwargs ) -> None :
@@ -378,24 +319,22 @@ def analyze(self) -> PyApplication:
378319 # Build symbol table from cached application if available (if no available, the build a new one)
379320 symbol_table = self ._build_symbol_table (cached_pyapplication .symbol_table if cached_pyapplication else {})
380321
381- # Build the call graph in four steps:
382- # 1. Run CodeQL (when enabled). Produces resolved edges with
383- # ``provenance=["codeql"]`` and augments ``PyCallsite``s
384- # in-place — filling ``callee_signature`` for sites Jedi
385- # couldn't resolve.
386- # 2. Heuristic fallback for constructor calls neither Jedi nor
387- # CodeQL could resolve (commonly classes nested inside
388- # functions). Walks the symbol table by class short-name +
389- # scope and writes ``<class>.__init__`` into the site.
390- # 3. Derive Jedi edges from the now-fully-augmented symbol
391- # table — these reflect every resolution the symbol table
392- # contains, regardless of which pass put it there.
393- # 4. Merge with CodeQL edges; provenance unions for edges both
394- # backends saw.
395- codeql_edges = self ._get_call_graph (symbol_table , augment_sites = True )
322+ # Level 1: symbol table only — constructor heuristic still runs to
323+ # enrich PyCallsite.callee_signature inside the symbol table itself,
324+ # but no call_graph edge list is produced.
396325 resolve_unresolved_constructors (symbol_table )
397- jedi_edges = jedi_call_graph_edges (symbol_table )
398- call_graph = merge_edges (jedi_edges , codeql_edges )
326+
327+ call_graph = []
328+ if self .analysis_level >= 2 :
329+ # Level 2: build call graph.
330+ # 1. Derive Jedi edges from the augmented symbol table.
331+ # 2. Run PyCG (iterative name-pointer analysis) for additional
332+ # edges — particularly locally-scoped function calls and
333+ # higher-order patterns that Jedi misses.
334+ # 3. Merge; provenance unions for edges seen by both backends.
335+ jedi_edges = jedi_call_graph_edges (symbol_table )
336+ pycg_edges = self ._get_pycg_call_graph (symbol_table )
337+ call_graph = merge_edges (jedi_edges , pycg_edges )
399338
400339 # Recreate pyapplication
401340 app = PyApplication .builder ().symbol_table (symbol_table ).call_graph (call_graph ).build ()
@@ -601,120 +540,23 @@ def _build_symbol_table(self, cached_symbol_table: Optional[Dict[str, PyModule]]
601540 logger .info ("✅ Symbol table generation complete." )
602541 return symbol_table
603542
604- def _ensure_codeql_packs (self , codeql_bin : Path ) -> Path :
605- """Materialize a qlpack that depends on ``codeql/python-all``.
606-
607- The CodeQL CLI install ships only the language extractors — query
608- library packs (and their transitive dependencies like
609- ``codeql/concepts``) must be resolved separately. The canonical
610- way is to declare the dependency in a ``qlpack.yml`` and run
611- ``codeql pack install`` in that directory; CodeQL writes a
612- ``codeql-pack.lock.yml`` and downloads everything needed.
613-
614- We do this once per project under ``<cache_dir>/codeql/qlpack/``
615- and return that directory. The query runner then writes its
616- temporary ``.ql`` file inside this pack — colocation makes
617- ``import python`` resolve without any ``--additional-packs`` or
618- ``--search-path`` gymnastics.
619- """
620- pack_dir = self .cache_dir / "codeql" / "qlpack"
621- pack_dir .mkdir (parents = True , exist_ok = True )
622- qlpack_yml = pack_dir / "qlpack.yml"
623- lock_file = pack_dir / "codeql-pack.lock.yml"
624-
625- if not qlpack_yml .exists ():
626- qlpack_yml .write_text (
627- "name: codeanalyzer-deps\n "
628- "version: 1.0.0\n "
629- "dependencies:\n "
630- ' codeql/python-all: "*"\n '
631- )
632-
633- if lock_file .exists ():
634- logger .debug (f"CodeQL pack dependencies already installed in { pack_dir } " )
635- return pack_dir
636-
637- logger .info (f"Installing CodeQL pack dependencies in { pack_dir } ." )
638- proc = subprocess .Popen (
639- [str (codeql_bin ), "pack" , "install" , str (pack_dir )],
640- stdout = subprocess .PIPE ,
641- stderr = subprocess .PIPE ,
642- )
643- _ , err = proc .communicate ()
644- if proc .returncode != 0 :
645- raise CodeQLExceptions .CodeQLDatabaseBuildException (
646- f"Failed to install CodeQL pack dependencies:\n "
647- f"{ (err or b'' ).decode (errors = 'replace' )} "
648- )
649- return pack_dir
650-
651- def _ensure_codeql_bin (self ) -> Path :
652- """Locate (or download) the CodeQL CLI binary into the project cache.
653-
654- Resolution order:
655- 1. An existing binary inside ``<cache_dir>/codeql/bin/`` —
656- reused across runs on the same project.
657- 2. ``codeql`` already on the user's PATH — picked up verbatim.
658- 3. Otherwise, download into ``<cache_dir>/codeql/bin/``.
659-
660- The project-local cache is preferred over PATH so the version we
661- installed earlier wins over whatever the OS ships — keeps behavior
662- deterministic when the user has both.
663- """
664- bin_root = self .cache_dir / "codeql" / "bin"
665- bin_root .mkdir (parents = True , exist_ok = True )
666-
667- existing = next (
668- (p for p in bin_root .rglob ("codeql" ) if p .is_file ()),
669- None ,
670- )
671- if existing and os .access (existing , os .X_OK ):
672- logger .debug (f"Reusing cached CodeQL CLI at { existing } " )
673- return existing .resolve ()
674-
675- on_path = shutil .which ("codeql" )
676- if on_path :
677- logger .debug (f"Using CodeQL CLI from PATH at { on_path } " )
678- return Path (on_path )
679-
680- logger .info (f"CodeQL CLI not found; downloading into { bin_root } ." )
681- downloaded = CodeQLLoader .download_and_extract_codeql (bin_root )
682- if not downloaded .exists () or not os .access (downloaded , os .X_OK ):
683- raise FileNotFoundError (
684- f"CodeQL binary not executable after download: { downloaded } "
685- )
686- return downloaded
687-
688- def _get_call_graph (
543+ def _get_pycg_call_graph (
689544 self ,
690545 symbol_table : Dict [str , PyModule ],
691- augment_sites : bool = False ,
692546 ) -> List [PyCallEdge ]:
693- """Build CodeQL-resolved call edges and optionally augment sites.
694-
695- Returns an empty list when CodeQL isn't enabled or the database
696- isn't available. Edges carry ``provenance=["codeql"]`` — merge
697- with Jedi-derived edges via ``call_graph.merge_edges``.
547+ """Build PyCG-resolved call edges.
698548
699- When ``augment_sites`` is True, also mutates
700- ``PyCallable.call_sites`` in the symbol table to backfill
701- ``callee_signature`` for sites Jedi couldn't resolve. The single
702- CodeQL query is shared (cached on the ``CodeQL`` instance) so
703- this costs no extra DB work.
549+ Runs PyCG's iterative name-pointer analysis over the whole project
550+ and returns edges with ``provenance=["pycg"]``. Falls back to an
551+ empty list and logs a warning on any failure so the caller can
552+ continue with Jedi-only edges.
704553 """
705- if not self .using_codeql or self .db_path is None :
706- return []
707554 try :
708- cq = CodeQL (
709- self .project_dir ,
710- self .db_path ,
711- codeql_bin = self .codeql_bin ,
712- codeql_packs_dir = self .codeql_packs_dir ,
713- )
714- edges = cq .build_call_graph_edges (symbol_table )
715- if augment_sites :
716- cq .augment_call_sites (symbol_table )
717- return edges
718- except Exception as exc :
719- logger .warning (f"CodeQL call-graph extraction failed: { exc } " )
555+ pycg = PyCG (self .project_dir , skip_tests = self .skip_tests )
556+ return pycg .build_call_graph_edges (symbol_table )
557+ except PyCGExceptions .PyCGImportError as exc :
558+ logger .warning (f"PyCG not installed — level 2 edges will be Jedi-only: { exc } " )
559+ return []
560+ except PyCGExceptions .PyCGAnalysisError as exc :
561+ logger .warning (f"PyCG analysis failed — level 2 edges will be Jedi-only: { exc } " )
720562 return []
0 commit comments