From 3d46a8102765e2e0c1dda34f0736772ccf4830d4 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Fri, 15 May 2026 19:47:10 +0900 Subject: [PATCH 1/2] feat: support pyproject.toml license extraction in manifest flow Signed-off-by: Wonjae Park --- pyproject.toml | 1 + src/fosslight_source/_scan_item.py | 1 + .../run_manifest_extractor.py | 63 +++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f10aa8b..6cbdccd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "normality==2.6.1", # Python 3.13+ needs psycopg2-binary 2.9.10+ (has wheels; 2.9.9 builds fail with _PyInterpreterState_Get) "psycopg2-binary>=2.9.10; python_version >= '3.13'", + "tomli; python_version < '3.11'", "tqdm", ] diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index 591a9b6..b0a742d 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -24,6 +24,7 @@ r'package\.json$', r'setup\.py$', r'setup\.cfg$', + r'pyproject\.toml$', r'.*\.podspec$', r'Cargo\.toml$', r'huggingface_hub_metadata\.json$', diff --git a/src/fosslight_source/run_manifest_extractor.py b/src/fosslight_source/run_manifest_extractor.py index b9efd2f..e68f8b5 100644 --- a/src/fosslight_source/run_manifest_extractor.py +++ b/src/fosslight_source/run_manifest_extractor.py @@ -125,6 +125,63 @@ def get_licenses_from_setup_py(file_path: str) -> list[str]: return _split_spdx_expression(value) +def get_licenses_from_pyproject_toml(file_path: str) -> list[str]: + try: + data = None + try: + import tomllib as toml_loader # Python 3.11+ + with open(file_path, 'rb') as f: + data = toml_loader.load(f) + except Exception: + try: + import tomli as toml_loader # Backport + with open(file_path, 'rb') as f: + data = toml_loader.load(f) + except Exception: + data = None + + if isinstance(data, dict): + project_tbl = data.get('project') or {} + license_value = project_tbl.get('license') + if isinstance(license_value, str) and license_value.strip(): + return _split_spdx_expression(license_value.strip()) + if isinstance(license_value, dict): + text_value = license_value.get('text') + if isinstance(text_value, str) and text_value.strip(): + return _split_spdx_expression(text_value.strip()) + if license_value.get('file'): + return [] + except Exception as ex: + logger.info(f"Failed to parse pyproject.toml via toml parser for {file_path}: {ex}") + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + project_match = re.search(r'^\s*\[project\]\s*(.*?)(?=^\s*\[|\Z)', content, flags=re.MULTILINE | re.DOTALL) + if not project_match: + return [] + block = project_match.group(1) + m = re.search(r'^\s*license\s*=\s*(?P"""|\'\'\'|"|\')(?P.*?)(?P=q)', block, + flags=re.MULTILINE | re.DOTALL) + if m: + val = m.group('val').strip() + if val: + return _split_spdx_expression(val) + m2 = re.search(r'^\s*license\s*=\s*\{[^}]*?\btext\s*=\s*(?P"""|\'\'\'|"|\')(?P.*?)(?P=q)', + block, flags=re.MULTILINE | re.DOTALL) + if m2: + val = m2.group('val').strip() + if val: + return _split_spdx_expression(val) + m3 = re.search(r'^\s*license\s*=\s*\{[^}]*?\bfile\s*=', block, flags=re.MULTILINE | re.DOTALL) + if m3: + return [] + except Exception as ex: + logger.info(f"Failed to parse pyproject.toml {file_path}: {ex}") + return [] + return [] + + def get_licenses_from_podspec(file_path: str) -> list[str]: try: with open(file_path, 'r', encoding='utf-8') as f: @@ -278,6 +335,12 @@ def get_manifest_licenses(file_path: str) -> list[str]: except Exception as ex: logger.info(f"Failed to extract license from setup.py {file_path}: {ex}") return [] + elif os.path.basename(file_path).lower() == 'pyproject.toml': + try: + return get_licenses_from_pyproject_toml(file_path) + except Exception as ex: + logger.info(f"Failed to extract license from pyproject.toml {file_path}: {ex}") + return [] elif os.path.basename(file_path).lower().endswith('.podspec'): try: return get_licenses_from_podspec(file_path) From 18e9054ac6c690455083a1fb56ed932174a2b413 Mon Sep 17 00:00:00 2001 From: Wonjae Park Date: Wed, 20 May 2026 14:57:55 +0900 Subject: [PATCH 2/2] fix: preserve pyproject license expressions Signed-off-by: Wonjae Park --- src/fosslight_source/run_manifest_extractor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fosslight_source/run_manifest_extractor.py b/src/fosslight_source/run_manifest_extractor.py index e68f8b5..9c03fb1 100644 --- a/src/fosslight_source/run_manifest_extractor.py +++ b/src/fosslight_source/run_manifest_extractor.py @@ -144,11 +144,11 @@ def get_licenses_from_pyproject_toml(file_path: str) -> list[str]: project_tbl = data.get('project') or {} license_value = project_tbl.get('license') if isinstance(license_value, str) and license_value.strip(): - return _split_spdx_expression(license_value.strip()) + return [license_value.strip()] if isinstance(license_value, dict): text_value = license_value.get('text') if isinstance(text_value, str) and text_value.strip(): - return _split_spdx_expression(text_value.strip()) + return [text_value.strip()] if license_value.get('file'): return [] except Exception as ex: @@ -166,13 +166,13 @@ def get_licenses_from_pyproject_toml(file_path: str) -> list[str]: if m: val = m.group('val').strip() if val: - return _split_spdx_expression(val) + return [val] m2 = re.search(r'^\s*license\s*=\s*\{[^}]*?\btext\s*=\s*(?P"""|\'\'\'|"|\')(?P.*?)(?P=q)', block, flags=re.MULTILINE | re.DOTALL) if m2: val = m2.group('val').strip() if val: - return _split_spdx_expression(val) + return [val] m3 = re.search(r'^\s*license\s*=\s*\{[^}]*?\bfile\s*=', block, flags=re.MULTILINE | re.DOTALL) if m3: return []