diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..cc9da72 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,44 @@ +name: publish + +# Publishes the package to PyPI when a GitHub Release is published. +# Uses PyPI Trusted Publishing (OIDC) — no API token is stored in the repo. +# The matching publisher must be configured on PyPI for project "filtersam-tools" +# with this workflow filename (publish.yml) and environment name (pypi). + +on: + release: + types: [published] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Build sdist and wheel + run: | + python -m pip install --upgrade build + python -m build + + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + publish: + needs: build + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write # required for Trusted Publishing + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c5f508a..75a701c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,7 +23,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install pysam numpy pytest parallelbam + python -m pip install pysam numpy pytest + # parallelbam-tools>=0.0.20 (sort_by_name support) is not on PyPI yet; + # install from the repo until it is released, then switch to: + # python -m pip install "parallelbam-tools>=0.0.20" + python -m pip install "git+https://github.com/Robaina/parallelBAM.git@main" python -m pip install -e . --no-deps - name: Run tests diff --git a/filtersam/filtersam.py b/filtersam/filtersam.py index ed817fb..db64c21 100644 --- a/filtersam/filtersam.py +++ b/filtersam/filtersam.py @@ -173,6 +173,10 @@ def filterSAM(input_path: Path, output_path: Path = None, print('Converting sam file to bam for processing') input_path = Path(input_path.as_posix().replace('.sam', '.bam')) sam2bam(input_path) + # Both filters evaluate each segment independently, so the chunks do + # not need reads grouped by query name. Skipping parallelbam's + # name-sort avoids a full serial pass over the file (see #3). parallelizeBAMoperation(path_to_bam=input_path.as_posix(), callback=filter_method, callback_additional_args=[cutoff], - n_processes=n_processes, output_path=output_path.as_posix()) + n_processes=n_processes, output_path=output_path.as_posix(), + sort_by_name=False) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4a85092 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=61", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index d3f7e3d..82aa511 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ numpy==1.21.2 pysam==0.16.0.1 -parallelbam==0.0.12 +parallelbam-tools>=0.0.20 diff --git a/setup.py b/setup.py index 619339b..fba8cee 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ DESCRIPTION = 'Tools to filter sam o bam files by percent identity or percent of matched sequence' LONG_DESCRIPTION = long_description LONG_DESCRIPTION_CONTENT_TYPE = 'text/markdown' -NAME = 'filtersam' +NAME = 'filtersam-tools' AUTHOR = "Semidán Robaina Estévez, 2021-2022" AUTHOR_EMAIL = "srobaina@ull.edu.es" MAINTAINER = "Semidán Robaina Estévez" @@ -32,7 +32,7 @@ download_url=DOWNLOAD_URL, license=LICENSE, packages=find_packages(), - install_requires=['numpy', 'pysam', 'parallelbam'], + install_requires=['numpy', 'pysam', 'parallelbam-tools>=0.0.20'], entry_points ={ 'console_scripts': [ 'filtersam = filtersam.cli:main'