Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE public.repositories DROP COLUMN license;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE public.repositories ADD COLUMN license VARCHAR(255);
13 changes: 10 additions & 3 deletions scripts/services/docker/Dockerfile.git_integration
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,17 @@ RUN apt-get update && apt-get install -y \
ca-certificates \
git \
ripgrep \
ruby \
libgit2-1.1 \
ruby-dev \
build-essential \
libgit2-dev \
cmake \
pkg-config \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean \
&& apt-get autoremove -y
&& gem install licensee -v '9.15.3' --no-document \
&& apt-get remove --autoremove -y ruby-dev build-essential libgit2-dev cmake pkg-config \
&& rm -rf /var/lib/apt/lists/*

ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
Expand Down
11 changes: 11 additions & 0 deletions services/apps/git_integration/src/crowdgit/database/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,17 @@ async def update_last_processed_commit(repo_id: str, commit_hash: str, branch: s
return str(result)


async def update_repository_license(repository_id: str, license_spdx: str | None) -> None:
sql_query = """
UPDATE public.repositories
SET license = $1::varchar,
"updatedAt" = NOW()
WHERE id = $2
AND license IS DISTINCT FROM $1::varchar
"""
await execute(sql_query, (license_spdx, repository_id))
Comment thread
cursor[bot] marked this conversation as resolved.


async def mark_repo_as_processed(repo_id: str, repo_state: RepositoryState):
sql_query = """
UPDATE git."repositoryProcessing"
Expand Down
3 changes: 3 additions & 0 deletions services/apps/git_integration/src/crowdgit/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from crowdgit.services import (
CloneService,
CommitService,
LicenseService,
MaintainerService,
QueueService,
SoftwareValueService,
Expand All @@ -28,6 +29,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
software_value_service = SoftwareValueService()
vulnerability_scanner_service = VulnerabilityScannerService()
maintainer_service = MaintainerService()
license_service = LicenseService()

worker_task = None
worker = RepositoryWorker(
Expand All @@ -36,6 +38,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
software_value_service=software_value_service,
vulnerability_scanner_service=vulnerability_scanner_service,
maintainer_service=maintainer_service,
license_service=license_service,
queue_service=queue_service,
)
logger.info("Repo worker initialized")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from crowdgit.services.base.base_service import BaseService
from crowdgit.services.clone.clone_service import CloneService
from crowdgit.services.commit.commit_service import CommitService
from crowdgit.services.license.license_service import LicenseService
from crowdgit.services.maintainer.maintainer_service import MaintainerService
from crowdgit.services.queue.queue_service import QueueService
from crowdgit.services.software_value.software_value_service import SoftwareValueService
Expand All @@ -12,6 +13,7 @@
"BaseService",
"CloneService",
"CommitService",
"LicenseService",
"SoftwareValueService",
"VulnerabilityScannerService",
"MaintainerService",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from crowdgit.services.license.license_service import LicenseService

__all__ = ["LicenseService"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json

from crowdgit.errors import CommandExecutionError, CommandTimeoutError
from crowdgit.services.base.base_service import BaseService
from crowdgit.services.utils import run_shell_command


class LicenseService(BaseService):
"""Detects SPDX license from a cloned repository using the licensee gem."""

async def detect(self, repo_path: str) -> str | None:
"""Run licensee against repo_path and return the SPDX identifier, or None."""
Comment thread
gaspergrom marked this conversation as resolved.
try:
output = await run_shell_command(
["licensee", "detect", "--json", repo_path], timeout=60
)
except CommandExecutionError:
self.logger.info(f"licensee found no license in {repo_path}")
Comment thread
gaspergrom marked this conversation as resolved.
return None
Comment thread
gaspergrom marked this conversation as resolved.
except CommandTimeoutError as e:
self.logger.warning(f"licensee timed out: {repr(e)}")
return None
except FileNotFoundError as e:
self.logger.warning(f"licensee binary not found in PATH: {repr(e)}")
return None
except Exception as e:
self.logger.warning(f"licensee failed: {repr(e)}")
return None

try:
data = json.loads(output)
licenses = data.get("licenses") or []
matched_files = data.get("matched_files") or []
spdx_id = licenses[0].get("spdx_id") if licenses else None
confidence = (
(matched_files[0].get("matcher") or {}).get("confidence")
if matched_files
else None
)
Comment thread
gaspergrom marked this conversation as resolved.
if spdx_id:
self.logger.info(
f"License detected: {spdx_id} (confidence={confidence}) in {repo_path}"
)
else:
self.logger.info(f"No SPDX license matched in {repo_path}")
return spdx_id
except Exception as e:
self.logger.warning(f"Failed to parse licensee output: {repr(e)}")
return None
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
mark_repo_as_processed,
release_repo,
update_last_processed_commit,
update_repository_license,
)
from crowdgit.enums import RepositoryState
from crowdgit.errors import (
Expand All @@ -22,6 +23,7 @@
from crowdgit.services import (
CloneService,
CommitService,
LicenseService,
MaintainerService,
QueueService,
SoftwareValueService,
Expand All @@ -46,13 +48,15 @@ def __init__(
software_value_service: SoftwareValueService,
vulnerability_scanner_service: VulnerabilityScannerService,
maintainer_service: MaintainerService,
license_service: LicenseService,
queue_service: QueueService,
Comment thread
gaspergrom marked this conversation as resolved.
):
self.clone_service = clone_service
self.commit_service = commit_service
self.software_value_service = software_value_service
self.vulnerability_scanner_service = vulnerability_scanner_service
self.maintainer_service = maintainer_service
self.license_service = license_service
self.queue_service = queue_service
self._shutdown = False

Expand Down Expand Up @@ -159,6 +163,7 @@ def _bind_repository_context(self, repository: Repository, repo_name: str) -> No
(self.maintainer_service, "maintainer_processing"),
(self.software_value_service, "software_value_processing"),
(self.vulnerability_scanner_service, "vulnerability_scan_processing"),
(self.license_service, "license_detection"),
(self.queue_service, "queue_service"),
]

Expand All @@ -174,6 +179,7 @@ def _reset_all_contexts(self) -> None:
self.maintainer_service,
self.software_value_service,
self.vulnerability_scanner_service,
self.license_service,
self.queue_service,
]

Expand Down Expand Up @@ -236,6 +242,8 @@ async def _process_single_repository(self, repository: Repository):
repository.id, batch_info.repo_path, repository.url
)
await self.maintainer_service.process_maintainers(repository, batch_info)
license_spdx = await self.license_service.detect(batch_info.repo_path)
Comment thread
cursor[bot] marked this conversation as resolved.
await update_repository_license(repository.id, license_spdx)
Comment thread
gaspergrom marked this conversation as resolved.
Comment thread
gaspergrom marked this conversation as resolved.
await self.commit_service.process_single_batch_commits(
repository,
batch_info,
Expand Down
45 changes: 17 additions & 28 deletions services/apps/members_enrichment_worker/src/activities/member.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,8 @@ export async function getIdentitiesExistInOtherMembers(
excludeMemberId: string,
identities: IMemberIdentity[],
): Promise<IMemberIdentity[]> {
let rows: IMemberIdentity[] = []

try {
const db = svc.postgres.reader
rows = await getIdentitiesExistInOthers(db, excludeMemberId, identities)
} catch (err) {
throw err
}

const db = svc.postgres.reader
const rows = await getIdentitiesExistInOthers(db, excludeMemberId, identities)
return rows
}

Expand All @@ -94,25 +87,21 @@ export async function updateMemberWithEnrichmentData(
identities: IMemberIdentity[],
attributes?: IAttributes,
): Promise<void> {
try {
await svc.postgres.writer.connection().tx(async (tx) => {
for (const identity of identities) {
await createMemberIdentity(new PgPromiseQueryExecutor(tx), {
memberId,
platform: identity.platform,
value: identity.value,
type: identity.type,
verified: identity.verified || false,
source: 'enrichment',
})
}
if (attributes) {
await updateMemberAttributes(tx, memberId, attributes)
}
})
} catch (err) {
throw err
}
await svc.postgres.writer.connection().tx(async (tx) => {
for (const identity of identities) {
await createMemberIdentity(new PgPromiseQueryExecutor(tx), {
memberId,
platform: identity.platform,
value: identity.value,
type: identity.type,
verified: identity.verified || false,
source: 'enrichment',
})
}
if (attributes) {
await updateMemberAttributes(tx, memberId, attributes)
}
})
}

export async function mergeMembers(
Expand Down
7 changes: 5 additions & 2 deletions services/libs/data-access-layer/src/repositories/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export interface IRepository {
updatedAt: string
deletedAt: string | null
lastArchivedCheckAt: string | null
license: string | null
Comment thread
cursor[bot] marked this conversation as resolved.
}

export interface ICreateRepository {
Expand Down Expand Up @@ -148,7 +149,8 @@ export async function getRepositoriesBySourceIntegrationId(
"createdAt",
"updatedAt",
"deletedAt",
"lastArchivedCheckAt"
"lastArchivedCheckAt",
license
FROM public.repositories
WHERE "sourceIntegrationId" = $(sourceIntegrationId)
AND "deletedAt" IS NULL
Expand Down Expand Up @@ -190,7 +192,8 @@ export async function getRepositoriesByUrl(
"createdAt",
"updatedAt",
"deletedAt",
"lastArchivedCheckAt"
"lastArchivedCheckAt",
license
FROM public.repositories
WHERE url IN ($(repoUrls:csv))
${deletedFilter}
Expand Down
Loading