From 567d2ce0bd872a3fddeb5c076a79b81028ea07b6 Mon Sep 17 00:00:00 2001 From: Stefano Braghin <527806+stefano81@users.noreply.github.com> Date: Tue, 30 Jun 2026 20:56:46 +0200 Subject: [PATCH 1/3] fix: make flair an optional dependency Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com> --- pyproject.toml | 12 +++++++----- .../classification/unstructured/flair_ner.py | 13 +++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3b454ce..1e0de75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,18 +27,17 @@ classifiers = [ dependencies = [ "datasets==4.8.5", "fastparquet==2024.5.0", - "flair>=0.15.1", "google-re2==1.1.20251105", "huggingface-hub>=0.34.0,<1.0", "mecab-python3==1.0.12", "nltk==3.9.4", "opt_einsum==3.3.0", - "pandas==2.3.2", + "pandas==3.0.3", "pip>=26.0.0", "pyarrow==24.0.0", "pytz==2024.1", "safetensors==0.4.4", - "scipy==1.17.1", + "scipy>=1.17.1", "spacy==3.8.7", "SPARQLWrapper==2.0.0", "stanza==1.10.1", @@ -54,6 +53,9 @@ rest = [ "fastapi==0.136.1", "python-dotenv==1.2.2", ] +flair = [ + "flair>=0.15.1", +] dev = [ "bandit", "faker", @@ -61,8 +63,8 @@ dev = [ # "git+https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl", # "git+https://github.com/explosion/spacy-models/releases/download/ja_core_news_sm-3.7.0/ja_core_news_sm-3.7.0-py3-none-any.whl", # "git+https://github.com/explosion/spacy-models/releases/download/xx_ent_wiki_sm-3.7.0/xx_ent_wiki_sm-3.7.0-py3-none-any.whl", - "httpx==0.28.1", - "notebook==7.6.0", + "httpx", + "notebook", "pytest-faker", "pytest-mock", "ruff", diff --git a/src/risk_assessment/classification/unstructured/flair_ner.py b/src/risk_assessment/classification/unstructured/flair_ner.py index 33bb037..8ad57f7 100644 --- a/src/risk_assessment/classification/unstructured/flair_ner.py +++ b/src/risk_assessment/classification/unstructured/flair_ner.py @@ -1,7 +1,11 @@ from typing import Any -from flair.data import Sentence -from flair.models import SequenceTagger +try: + from flair.data import Sentence + from flair.models import SequenceTagger +except ImportError: + Sentence = None # type: ignore[assignment,misc] + SequenceTagger = None # type: ignore[assignment] from risk_assessment.classification.unstructured import Entity, EntityExtractor @@ -23,6 +27,10 @@ def __init__( nlp_model: Any = None, nlp_model_name: str = "spacy", ) -> None: + if SequenceTagger is None: + raise ImportError( + "The 'flair' package is required to use FLAIREntityExtractor. " "Install it with: pip install flair" + ) if type_mapping is None: type_mapping = {} super().__init__(type_mapping) @@ -40,6 +48,7 @@ def split_text_into_sentences(self, text: str) -> list[str]: return sentences def extract(self, text: str) -> list[Entity]: + assert Sentence is not None sentences = self.split_text_into_sentences(text) entities: list[Entity] = [] sentences_shift = find_sentences_shift(text, sentences) From 0e064f88458ee02b366664a43206b6f3d332e0a4 Mon Sep 17 00:00:00 2001 From: Stefano Braghin <527806+stefano81@users.noreply.github.com> Date: Wed, 1 Jul 2026 15:23:33 +0200 Subject: [PATCH 2/3] fix: make ruff happy Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com> --- pyproject.toml | 14 ++++++-------- .../classification/unstructured/flair_ner.py | 2 +- .../classification/unstructured/test_stanza_ner.py | 4 ++++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1e0de75..3d8e5e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,10 +25,9 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "datasets==4.8.5", + "datasets==5.0.0", "fastparquet==2024.5.0", "google-re2==1.1.20251105", - "huggingface-hub>=0.34.0,<1.0", "mecab-python3==1.0.12", "nltk==3.9.4", "opt_einsum==3.3.0", @@ -38,19 +37,18 @@ dependencies = [ "pytz==2024.1", "safetensors==0.4.4", "scipy>=1.17.1", - "spacy==3.8.7", + "spacy==3.8.14", "SPARQLWrapper==2.0.0", - "stanza==1.10.1", - "tika==2.6.0", - "torch~=2.6.0", - "transformers==4.57.6", + "stanza==1.13.0", + "tika==3.1.0", + "torch~=2.12.1", "unidic-lite==1.0.8", "urllib3==2.7.0", "word2number==1.1", ] [project.optional-dependencies] rest = [ - "fastapi==0.136.1", + "fastapi==0.138.2", "python-dotenv==1.2.2", ] flair = [ diff --git a/src/risk_assessment/classification/unstructured/flair_ner.py b/src/risk_assessment/classification/unstructured/flair_ner.py index 8ad57f7..a58e2e2 100644 --- a/src/risk_assessment/classification/unstructured/flair_ner.py +++ b/src/risk_assessment/classification/unstructured/flair_ner.py @@ -29,7 +29,7 @@ def __init__( ) -> None: if SequenceTagger is None: raise ImportError( - "The 'flair' package is required to use FLAIREntityExtractor. " "Install it with: pip install flair" + "The 'flair' package is required to use FLAIREntityExtractor. Install it with: pip install flair" ) if type_mapping is None: type_mapping = {} diff --git a/tests/classification/unstructured/test_stanza_ner.py b/tests/classification/unstructured/test_stanza_ner.py index 27543af..2bc45e9 100644 --- a/tests/classification/unstructured/test_stanza_ner.py +++ b/tests/classification/unstructured/test_stanza_ner.py @@ -17,4 +17,8 @@ def test_stanza_ner_extractor_basic(): assert entities assert len(entities) == 99 + print(entities) + + assert False + assert len({entity.entity_type for entity in entities}) == 11 From e2c606460896459858ca74fded6d4bcf374309e2 Mon Sep 17 00:00:00 2001 From: Stefano Braghin <527806+stefano81@users.noreply.github.com> Date: Wed, 1 Jul 2026 22:40:56 +0200 Subject: [PATCH 3/3] fix: test used for debug Signed-off-by: Stefano Braghin <527806+stefano81@users.noreply.github.com> --- tests/classification/unstructured/test_stanza_ner.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/classification/unstructured/test_stanza_ner.py b/tests/classification/unstructured/test_stanza_ner.py index 2bc45e9..27543af 100644 --- a/tests/classification/unstructured/test_stanza_ner.py +++ b/tests/classification/unstructured/test_stanza_ner.py @@ -17,8 +17,4 @@ def test_stanza_ner_extractor_basic(): assert entities assert len(entities) == 99 - print(entities) - - assert False - assert len({entity.entity_type for entity in entities}) == 11