Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,5 @@ cython_debug/
marimo/_static/
marimo/_lsp/
__marimo__/
/populate/.dbeaver/
/populate/.project
1,071 changes: 1,071 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "welearn-database"
version = "1.4.2"
version = "1.4.3"
description = "All stuff related to relationnal database from the WeLearn project"
authors = [
{name = "Théo",email = "theo.nardin@cri-paris.org"}
Expand All @@ -18,10 +18,10 @@ dependencies = [
[tool.poetry]

[tool.poetry.group.dev.dependencies]
mypy = "^1.16.0"
bandit = "^1.8.6"
isort = "^6.1.0"
black = "^25.9.0"
mypy = "^2.1.0"
bandit = "^1.9.4"
isort = "^8.0.1"
black = "26.3.1"

[build-system]
requires = ["poetry-core>=2.0.0,<3.0.0"]
Expand Down
62 changes: 53 additions & 9 deletions tests/test_document_related.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
ProcessState,
WeLearnDocument,
)
from welearn_database.exceptions import InvalidURLScheme
from welearn_database.exceptions import ContentIsTooShort, InvalidDOI, InvalidURLScheme


class TestWeLearnDocument(TestCase):
Expand Down Expand Up @@ -72,7 +72,7 @@ def test_validate_full_content(self):
)

def test_validate_too_short_full_content(self):
with self.assertRaises(ValueError):
with self.assertRaises(ContentIsTooShort):
WeLearnDocument(
title="Test Document",
url="https://example.com/test-document",
Expand All @@ -99,6 +99,52 @@ def test_full_content(self):
"This is a test document, used for unit testing, please ignore. Thank you!",
)

def test_doi(self):
test_doc = WeLearnDocument(
title="Test Document",
url="https://example.com/test-document",
full_content="This is a test document, used for unit testing, please ignore. Thank you!",
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={"author": "Test Author", "doi": "10.1000/xyz123"},
doi="10.1000/xyz123",
)

self.assertEqual(test_doc.doi, "10.1000/xyz123")

def test_invalid_doi(self):
with self.assertRaises(InvalidDOI):
WeLearnDocument(
title="Test Document",
url="https://example.com/test-document",
full_content="This is a test document, used for unit testing, please ignore. Thank you!",
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={
"author": "Test Author",
"doi": "11.1590/s0100-879x2002000500007",
},
doi="11.1590/s0100-879x2002000500007",
)

def test_unclean_doi(self):
with self.assertRaises(InvalidDOI):
WeLearnDocument(
title="Test Document",
url="https://example.com/test-document",
full_content="This is a test document, used for unit testing, please ignore. Thank you!",
description="A short description of the test document.",
lang="en",
corpus="Test Corpus",
details={
"author": "Test Author",
"doi": "https://doi.org/10.1000/xyz123",
},
doi="https://doi.org/10.1000/xyz123",
)

def test_description(self):
test_doc = WeLearnDocument(
title="Test Document",
Expand Down Expand Up @@ -284,7 +330,9 @@ def test_none_trace_in_db(self):
.first()
)
self.assertIsNotNone(doc_from_db)
self.assertEqual(doc_from_db.trace, None)
self.assertIsNone(
doc_from_db.trace,
)

def test_view_qty_document(self):
engine = create_engine("sqlite://")
Expand Down Expand Up @@ -335,9 +383,7 @@ def test_view_qty_document(self):
test_session.execute(text("DROP TABLE IF EXISTS qty_document_in_qdrant"))
test_session.commit()

test_session.execute(
text(
"""
test_session.execute(text("""
CREATE VIEW document_related.qty_document_in_qdrant AS
SELECT COUNT(1) AS document_in_qdrant
FROM (
Expand All @@ -346,9 +392,7 @@ def test_view_qty_document(self):
WHERE title = 'document_in_qdrant'
GROUP BY document_id
) latest;
"""
)
)
"""))
test_session.commit()

result = test_session.query(QtyDocumentInQdrant).first()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""add doi column to document

Revision ID: f1ce0ad2845b
Revises: f8602200fa99
Create Date: 2026-04-29 15:39:51.079086

"""

from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision: str = "f1ce0ad2845b"
down_revision: Union[str, None] = "b049924f7067"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.add_column(
"welearn_document",
sa.Column("doi", sa.String(), nullable=True, unique=True),
schema="document_related",
)


def downgrade() -> None:
op.drop_column("welearn_document", "doi", schema="document_related")
Loading
Loading