From 817add9ba3cad261e919ee25991a5b5574151e3f Mon Sep 17 00:00:00 2001
From: Roberto Polli <robipolli@gmail.com>
Date: Thu, 18 Jun 2026 13:41:28 +0200
Subject: [PATCH] Reorganize pre-commit hooks. See #46

---
 .bandit.yaml                    |  14 +--
 .isort.cfg                      |   5 --
 .pre-commit-config.yaml         | 149 +++++++++++++++++++++-----------
 .yamlfmt                        |  16 ++++
 README.md                       |  10 +--
 TESTING.md                      |   1 -
 docs/adr/0001-use-adr.md        |  43 +++++++++
 docs/adr/0002-use-pre-commit.md |  49 +++++++++++
 docs/doc-style.instructions.md  |  73 ++++++++++++++++
 ruff.toml                       |   7 ++
 tox.ini                         |  22 +----
 11 files changed, 302 insertions(+), 87 deletions(-)
 delete mode 100644 .isort.cfg
 create mode 100644 .yamlfmt
 create mode 100644 docs/adr/0001-use-adr.md
 create mode 100644 docs/adr/0002-use-pre-commit.md
 create mode 100644 docs/doc-style.instructions.md
 create mode 100644 ruff.toml

diff --git a/.bandit.yaml b/.bandit.yaml
index 544f267..7a60bf8 100644
--- a/.bandit.yaml
+++ b/.bandit.yaml
@@ -6,13 +6,13 @@
 # and you are certain that this is acceptable, they can be individually
 # silenced by appending # nosec to the line:
 exclude_dirs:
- - .tox
- - .git
- - .mypy_cache
- - .pytest_cache
- - .github
- - venv
- - tests
+- .tox
+- .git
+- .mypy_cache
+- .pytest_cache
+- .github
+- venv
+- tests
 
 # Skip assert inside test files.
 assert_used:
diff --git a/.isort.cfg b/.isort.cfg
deleted file mode 100644
index d82f21e..0000000
--- a/.isort.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Align isort profile with black.
-#
-[tool.isort]
-profile = "black"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 458ea36..948a7fb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,4 @@
+---
 #
 # Run pre-commit hooks. You can run them without installing
 #  the hook with
@@ -6,59 +7,109 @@
 #
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
+#
+# Note: Use docker images with specific digests to ensure reproducibility and security.
+# Fast checks run on commit; heavy/security scans are deferred to pre-push.
+default_stages:
+- pre-commit
+- manual
+- pre-push
 repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
-    hooks:
-    -   id: trailing-whitespace
-    -   id: end-of-file-fixer
-    -   id: check-xml
-    -   id: detect-private-key
-    -   id: check-yaml
-        args: [--allow-multiple-documents]
-    -   id: check-added-large-files
-- repo: https://github.com/myint/autoflake
-  rev: v2.3.1
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v6.0.0
   hooks:
-    - id: autoflake
-      args:
-        - --in-place
-        - --remove-unused-variables
-        - --remove-all-unused-imports
--   repo: https://github.com/psf/black
-    rev: 24.10.0
-    hooks:
-    -   id: black
-- repo: https://github.com/pycqa/isort
-  rev: 5.13.2
+  # Manage spaces.
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+  - id: check-added-large-files
+  - id: check-symlinks
+  - id: mixed-line-ending
+    args: [--fix=lf]
+  # Check file syntax/format.
+  - id: check-xml
+  - id: check-json
+    exclude: '.devcontainer/.*'  # devcontainer json files can contain comments.
+  - id: check-yaml
+    args: [--allow-multiple-documents]
+  # Security checks.
+  - id: detect-private-key
+  - id: detect-aws-credentials
+    args:
+    # See https://github.com/pre-commit/pre-commit-hooks/issues/174
+    - --allow-missing-credentials
+#
+# Python: lint, format, type-check, security, dependency audit.
+#
+- repo: https://github.com/astral-sh/ruff-pre-commit  # replaces autoflake, black, isort, flake8
+  rev: v0.15.4
   hooks:
-    - id: isort
-      name: isort (python)
-      # Use black profile for isort to avoid conflicts
-      #   see https://github.com/PyCQA/isort/issues/1518
-      args: ["--profile", "black"]
-    - id: isort
-      name: isort (cython)
-      types: [cython]
-    - id: isort
-      name: isort (pyi)
-      types: [pyi]
-- repo: https://github.com/PyCQA/flake8
-  rev: 7.1.1
+  - id: ruff
+    args: [--fix]
+  - id: ruff-format
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.18.2
   hooks:
-  - id: flake8
-- repo: https://github.com/PyCQA/bandit
+  - id: mypy
+- repo: https://github.com/PyCQA/bandit  # kept standalone: shared with CI + .bandit.yaml
   rev: 1.7.10
   hooks:
-    - id: bandit
-      name: bandit
-      args: ["-c", ".bandit.yaml"]
-      description: 'Bandit is a tool for finding common security issues in Python code'
-      entry: bandit
-      language: python
-      language_version: python3
-      types: [python]
-- repo: https://github.com/Lucas-C/pre-commit-hooks-safety
-  rev: v1.3.3
+  - id: bandit
+    name: bandit
+    args: ["-c", ".bandit.yaml"]
+    description: 'Bandit is a tool for finding common security
+      issues in Python code'
+    entry: bandit
+    language: python
+    language_version: python3
+    types: [python]
+- repo: https://github.com/pypa/pip-audit  # replaces gated safety; free OSV-backed scan
+  rev: v2.7.3
+  hooks:
+  - id: pip-audit
+    args: ["-r", "requirements.txt", "-r", "requirements-dev.txt"]
+    stages: ["pre-push"]  # can be slow: run only on push
+#
+# Document and data formatting.
+#
+- repo: https://github.com/google/yamlfmt
+  rev: v0.21.0
+  hooks:
+  - id: yamlfmt
+    files: \.(yaml|yml)$
+- repo: https://github.com/executablebooks/mdformat
+  rev: 0.7.17
+  hooks:
+  - id: mdformat
+    exclude: '.github/.*|.gitlab/.*'  # leave issue/PR templates untouched
+    additional_dependencies:
+    - mdformat-tables
+    - mdformat-gfm
+#
+# Security linters (deferred to pre-push).
+#
+- repo: https://github.com/bridgecrewio/checkov.git
+  rev: 3.2.507  # change to tag or sha
+  hooks:
+  - id: checkov
+    stages: ["pre-push"]
+    entry: >-
+      checkov
+
+        -d .
+  - id: checkov_secrets
+    stages: ["pre-push"]
+- repo: https://github.com/zizmorcore/zizmor-pre-commit
+  rev: v1.22.0
+  hooks:
+  - id: zizmor
+- repo: local
   hooks:
-    - id: python-safety-dependencies-check
+  - id: trufflehog
+    name: TruffleHog v3.93.3
+    description: Detect secrets in your data.
+    entry: bash -c 'docker run --cpus 2 --rm -v "$(pwd):/workdir"
+      -i --rm docker.io/trufflesecurity/trufflehog@sha256:06c1f230512cbb694954716fa5e0adbfb95809c7bfb5a50c25110847417b69db
+      git file:///workdir --branch HEAD --results=verified,unknown
+      --fail'
+    language: system
+    stages: ["pre-push"]
diff --git a/.yamlfmt b/.yamlfmt
new file mode 100644
index 0000000..338ff70
--- /dev/null
+++ b/.yamlfmt
@@ -0,0 +1,16 @@
+#
+# yamlfmt used by .pre-commit-config.yaml
+#
+gitignore_excludes: true
+formatter:
+  pad_line_comments: 2
+  indentless_arrays: true
+  # Line formatting.
+  retain_line_breaks: true
+  retain_line_breaks_single: true
+  scan_folded_as_literal: true
+  max_line_length: 60
+  # Interoperability.
+  drop_merge_tag: true
+  # Retain document markers.
+  include_document_start: true
diff --git a/README.md b/README.md
index 56b9d40..e064c83 100644
--- a/README.md
+++ b/README.md
@@ -45,8 +45,8 @@ Installing `act` is beyond the scope of this document.
 To test the pipeline locally and ensure that secrets (e.g., service accounts and other credentials)
 are correctly configured, use:
 
- ```bash
- # Run a specific job in the pipeline
- act -j test -s CI_API_TOKEN="$(cat gh-ci.json)" \
-      -s CI_ACCOUNT=my-secret-account
- ```
+```bash
+# Run a specific job in the pipeline
+act -j test -s CI_API_TOKEN="$(cat gh-ci.json)" \
+     -s CI_ACCOUNT=my-secret-account
+```
diff --git a/TESTING.md b/TESTING.md
index 1a1ee8f..98d25fc 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -6,7 +6,6 @@ which components should be verified during the development phase?
 Project tend to grow complex and it is not always easy to understand which components are involved in the test.
 For example, a modern application relies on many moving parts, including container orchestrators, API gateways, datastores & queues, credentials management, and so on.
 
-
 ```mermaid
 graph LR
 
diff --git a/docs/adr/0001-use-adr.md b/docs/adr/0001-use-adr.md
new file mode 100644
index 0000000..0cb7b00
--- /dev/null
+++ b/docs/adr/0001-use-adr.md
@@ -0,0 +1,43 @@
+# 1. Record architecture decisions
+
+<!-- In vim, use !!date -I to get current date. -->
+
+Date: 2025-12-03
+
+## Status
+
+<!-- Proposed, Accepted, Deprecated, Superseded, or Rejected -->
+
+Accepted
+
+## Context
+
+We need to record the architectural decisions made on this project.
+ADR should stay at design/decision level,
+without specific implementation details, code snippets
+or step by step procedures.
+
+This ADR is the template to use for all the others ADR:
+customize this file to ensure it fits your needs and
+ensure that all the ADRs follow the same format.
+
+The context provides the basic information that leads
+to decision, and may include the considered alternatives.
+
+## Decision
+
+- [ ] We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
+- [ ] Decision is a list of checkboxes, to be marked as the decision is implemented.
+- [ ] Avoid adding specific implementation details, code snippets or step-by-step procedures.
+- [ ] Consequences section contain Pros and Cons of decision,
+  in the form of bullet list.
+
+## Consequences
+
+Pros:
+
+- See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).
+
+Cons:
+
+- Requires discipline to maintain the ADRs up to date and ensure they are consulted when making decisions.
diff --git a/docs/adr/0002-use-pre-commit.md b/docs/adr/0002-use-pre-commit.md
new file mode 100644
index 0000000..20291c1
--- /dev/null
+++ b/docs/adr/0002-use-pre-commit.md
@@ -0,0 +1,49 @@
+# 2. Use pre-commit for local quality gates
+
+<!-- In vim, use !!date -I to get current date. -->
+
+Date: 2026-06-18
+
+## Status
+
+<!-- Proposed, Accepted, Deprecated, Superseded, or Rejected -->
+
+Accepted
+
+## Context
+
+This template ships quality and security gates so new
+projects inherit them from day one. The same checks run
+locally and in CI, giving contributors fast feedback
+before they push.
+
+We consolidate overlapping tools and split checks by
+cost: fast checks on commit, heavier and security scans
+on push. Per-hook rationale lives as inline comments in
+`.pre-commit-config.yaml`, not in this ADR.
+
+See [pre-commit](https://pre-commit.com) for the
+framework.
+
+## Decision
+
+- [ ] Use pre-commit to orchestrate local quality and
+  security gates, mirroring CI.
+- [ ] Consolidate the Python lint and format toolchain.
+- [ ] Run heavy and security scans at push time, and
+  pin container images by digest.
+
+## Consequences
+
+Pros:
+
+- One feedback loop shared by developers and CI.
+- Fewer tools to install and keep in sync.
+- Secrets and misconfigurations are caught before they
+  reach the remote.
+
+Cons:
+
+- Some scans need Docker on the contributor machine.
+- Push-time scans add latency to `git push`.
+- Hook revisions need periodic bumping to stay current.
diff --git a/docs/doc-style.instructions.md b/docs/doc-style.instructions.md
new file mode 100644
index 0000000..ca80a06
--- /dev/null
+++ b/docs/doc-style.instructions.md
@@ -0,0 +1,73 @@
+______________________________________________________________________
+
+title: Documentation Writing Instructions
+applyTo:
+
+- docs/\*\*/\*.md
+- README\*.md
+
+______________________________________________________________________
+
+## Documentation Writing Instructions
+
+### Language and Style
+
+- Use direct, concise sentences.
+- Avoid bold text; do not use `**bold**` or `__bold__`.
+- Do not use emojis.
+- Avoid unnecessary repetitions.
+- Use clear, unambiguous language.
+- Use markdown references (`[Section](#section)`) to link between document sections.
+- Always add markdown section identifiers (e.g., `## Section Name`).
+- Use consistent backticks for code, keywords, and field names (e.g., `skos:notation`, `@context`).
+- Use correct casing for acronyms and specification names (e.g., "JSON-LD", "CSV", "SKOS", "RDF").
+- Do not alter the casing of standard acronyms or specification names.
+- Numbered lists should always use `1.` for each item.
+- Lines should not exceed 60 characters when generating new content, but do not re-wrap existing lines.
+
+### Formatting
+
+- Use only standard markdown features.
+- Do not use HTML for formatting unless strictly necessary.
+- Use fenced code blocks for examples and code.
+- When showing mappings or field names, always wrap them in backticks.
+- For YAML, Turtle, or JSON examples, use the appropriate code block language identifier.
+
+### Diagrams
+
+- All diagrams must use Mermaid format
+  (fenced code block with `mermaid` language identifier).
+- Do not use ASCII art diagrams.
+- Use semantic, descriptive node IDs (not single letters).
+- Node links must explicitly describe the action
+  and nodes must explicitly describe the artifact in concise terms.
+- Use parallelogram nodes (`[/" "/]`) for
+  input and output files.
+- Use rectangle nodes (`[" "]`) for
+  actions and commands.
+- Prefix command nodes with the command name
+  followed by a colon and a short description
+  (e.g., `"csv create: serializza la proiezione JSON-LD in CSV"`).
+
+Example mermaid diagrams:
+
+```mermaid
+graph
+
+input-data[/"Input"/]
+my-process[["Process"]]
+output-data[/"Result"/]
+
+
+input-data -->|ingested by| my-process -->|produces| output-data
+
+```
+
+### Content Structure
+
+- Start with a clear title and a short introduction.
+- Use sections and subsections with clear headings.
+- Each section should have a unique markdown identifier.
+- Reference related documents using markdown links.
+- When describing processes, use step-by-step numbered lists.
+- For requirements, use bullet points or numbered lists as appropriate.
diff --git a/ruff.toml b/ruff.toml
new file mode 100644
index 0000000..e2c3b29
--- /dev/null
+++ b/ruff.toml
@@ -0,0 +1,7 @@
+# Ruff config — replaces black (format), isort (import sort), flake8, autoflake.
+line-length = 88            # matches black's default wrapping
+
+[lint]
+# E,F enabled by default → F401 (unused imports) + F841 (unused vars) replace autoflake.
+extend-select = ["I"]       # import sorting (isort); black profile is ruff's default
+ignore = ["E501"]           # tolerate long lines (links) — matches old flake8 max-line-length=150 intent
diff --git a/tox.ini b/tox.ini
index dcbe1fb..b6a45c3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -21,19 +21,6 @@ setenv =
 commands =
   pytest {posargs}
 
-[testenv:safety]
-# Tune up this section (e.g., if safety does not use requirement files.)
-deps =
-  -rrequirements.txt
-  -rrequirements-dev.txt
-  safety
-
-setenv =
-  PYTHONPATH=:.:
-
-commands =
-  safety check --short-report -r requirements.txt
-
 [testenv:release]
 # Release with tox via:
 #
@@ -70,10 +57,5 @@ commands =
 #
 # Tools configuration.
 #
-[flake8]
-# Ignore long lines in flake8 because
-#   they are managed by black and we
-#   want to support links.
-max-line-length = 150
-# Disable E203 because black correctly handles whitespaces before ':'.
-extend-ignore = E203
+# Lint/format settings moved to ruff.toml (ruff replaces black,
+# isort, flake8, autoflake).