diff --git a/config/settings/base.py b/config/settings/base.py index 6fc2f4c..a2ebbb9 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -323,8 +323,16 @@ "CORE_JOURNAL_API_ENDPOINT", default="/api/v2/pid/journal/", ) +CORE_ISSUE_API_ENDPOINT = env( + "CORE_ISSUE_API_ENDPOINT", + default="/api/v1/issue/", +) +CORE_ISSUE_FROM_DATE_CREATED = env( + "CORE_ISSUE_FROM_DATE_CREATED", + default="2019-01-01", +) CORE_COLLECTION_API_URL = f"{CORE_API_DOMAIN}{CORE_COLLECTION_API_ENDPOINT}" CORE_JOURNAL_API_URL = f"{CORE_API_DOMAIN}{CORE_JOURNAL_API_ENDPOINT}" #Aumento en el límite de campos -DATA_UPLOAD_MAX_NUMBER_FIELDS = 10000 \ No newline at end of file +DATA_UPLOAD_MAX_NUMBER_FIELDS = 10000 diff --git a/config/urls.py b/config/urls.py index cac7c09..3d74255 100644 --- a/config/urls.py +++ b/config/urls.py @@ -1,17 +1,16 @@ +from core.search import views as search_views from django.conf import settings -from django.urls import include, path -from django.contrib import admin -from django.conf.urls.static import static from django.conf.urls.i18n import i18n_patterns # ← Adicionar esta linha -from wagtail.admin import urls as wagtailadmin_urls +from django.conf.urls.static import static +from django.contrib import admin +from django.urls import include, path +from rest_framework_simplejwt.views import TokenObtainPairView, TokenRefreshView from wagtail import urls as wagtail_urls +from wagtail.admin import urls as wagtailadmin_urls from wagtail.documents import urls as wagtaildocs_urls -from wagtailautocomplete.urls.admin import urlpatterns as autocomplete_admin_urls -from rest_framework_simplejwt.views import TokenObtainPairView, TokenRefreshView -from core.search import views as search_views -from reference import views as reference_views from config import api_router as api_router +from markup_doc.autocomplete import urlpatterns as autocomplete_admin_urls urlpatterns = [ path("django-admin/", admin.site.urls), @@ -20,10 +19,12 @@ path("search/", search_views.search, name="search"), # JWT path("api/v1/auth/token/", TokenObtainPairView.as_view(), name="token_obtain_pair"), - path("api/v1/auth/token/refresh/", TokenRefreshView.as_view(), name="token_refresh"), + path( + "api/v1/auth/token/refresh/", TokenRefreshView.as_view(), name="token_refresh" + ), path("api/v1/", include(api_router)), # URL para trocar idioma - path('i18n/', include('django.conf.urls.i18n')), + path("i18n/", include("django.conf.urls.i18n")), ] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) # URLs com prefixo de idioma diff --git a/core/migrations/0003_coresyncstate_alter_flexibledate_id_alter_gender_id_and_more.py b/core/migrations/0003_coresyncstate_alter_flexibledate_id_alter_gender_id_and_more.py new file mode 100644 index 0000000..df45e0a --- /dev/null +++ b/core/migrations/0003_coresyncstate_alter_flexibledate_id_alter_gender_id_and_more.py @@ -0,0 +1,51 @@ +# Generated by Django 6.0.5 on 2026-05-31 17:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0002_wagtailsearch_indexentry_text_defaults'), + ] + + operations = [ + migrations.CreateModel( + name='CoreSyncState', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('resource', models.CharField(max_length=50, unique=True, verbose_name='Resource')), + ('last_updated_at', models.DateTimeField(blank=True, null=True, verbose_name='Last updated at')), + ('last_success_at', models.DateTimeField(blank=True, null=True, verbose_name='Last success at')), + ], + options={ + 'verbose_name': 'Core sync state', + 'verbose_name_plural': 'Core sync states', + }, + ), + migrations.AlterField( + model_name='flexibledate', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='gender', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='language', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='license', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='licensestatement', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + ] diff --git a/core/models.py b/core/models.py index bc46070..b62097c 100644 --- a/core/models.py +++ b/core/models.py @@ -1,11 +1,12 @@ import os -from django.db import models, IntegrityError -from django.db.models import Case, When, Value, IntegerField + from django.contrib.auth import get_user_model +from django.db import IntegrityError, models +from django.db.models import Case, IntegerField, Value, When +from django.utils import timezone from django.utils.translation import gettext_lazy as _ from wagtail.admin.panels import FieldPanel from wagtail.fields import RichTextField -from wagtail.search import index from wagtailautocomplete.edit_handlers import AutocompletePanel from . import choices @@ -13,6 +14,7 @@ User = get_user_model() + class CommonControlField(models.Model): """ Class with common control fields. @@ -77,11 +79,9 @@ def autocomplete_label(self): FieldPanel("gender"), ] - class Meta: unique_together = [("code", "gender")] - def __unicode__(self): return self.gender or self.code @@ -224,17 +224,20 @@ def get_object_in_preferred_language(self, language): mission = self.filter(language=language) if mission: return mission - - language_order = ['pt', 'es', 'en'] + + language_order = ["pt", "es", "en"] langs = self.all().values_list("language", flat=True) languages = Language.objects.filter(id__in=langs) - + # Define a ordem baseado na lista language_order - order = [When(code2=lang, then=Value(i)) for i, lang in enumerate(language_order)] + order = [ + When(code2=lang, then=Value(i)) for i, lang in enumerate(language_order) + ] ordered_languages = languages.annotate( - language_order=Case(*order, default=Value(len(language_order)), output_field=IntegerField()) - ).order_by('language_order') - + language_order=Case( + *order, default=Value(len(language_order)), output_field=IntegerField() + ) + ).order_by("language_order") for lang in ordered_languages: mission = self.filter(language=lang) @@ -257,7 +260,7 @@ class RichTextWithLanguage(models.Model): AutocompletePanel("language"), FieldPanel("rich_text"), ] - + objects = LanguageFallbackManager() class Meta: @@ -297,7 +300,7 @@ def autocomplete_label(self): ] class Meta: - unique_together = [("license_type", )] + unique_together = [("license_type",)] verbose_name = _("License") verbose_name_plural = _("Licenses") indexes = [ @@ -326,9 +329,7 @@ def get( ): if not license_type: raise ValueError("License.get requires license_type parameters") - filters = dict( - license_type__iexact=license_type - ) + filters = dict(license_type__iexact=license_type) try: return cls.objects.get(**filters) except cls.MultipleObjectsReturned: @@ -368,7 +369,8 @@ class LicenseStatement(CommonControlField): Language, on_delete=models.SET_NULL, null=True, blank=True ) license = models.ForeignKey( - License, on_delete=models.SET_NULL, null=True, blank=True) + License, on_delete=models.SET_NULL, null=True, blank=True + ) panels = [ FieldPanel("url"), @@ -406,7 +408,8 @@ def get( raise ValueError("LicenseStatement.get requires url or license_p") try: return cls.objects.get( - url__iexact=url, license_p__iexact=license_p, language=language) + url__iexact=url, license_p__iexact=license_p, language=language + ) except cls.MultipleObjectsReturned: return cls.objects.filter( url__iexact=url, license_p__iexact=license_p, language=language @@ -447,9 +450,7 @@ def create_or_update( ): try: data = dict( - url=url, - license_p=license_p, - language=language and language.code2 + url=url, license_p=license_p, language=language and language.code2 ) try: obj = cls.get(url, license_p, language) @@ -464,7 +465,9 @@ def create_or_update( except cls.DoesNotExist: return cls.create(user, url, license_p, language, license) except Exception as e: - raise ValueError(f"Unable to create or update LicenseStatement for {data}: {type(e)} {e}") + raise ValueError( + f"Unable to create or update LicenseStatement for {data}: {type(e)} {e}" + ) @staticmethod def parse_url(url): @@ -513,7 +516,7 @@ class FileWithLang(models.Model): blank=True, on_delete=models.SET_NULL, verbose_name=_("File"), - help_text='', + help_text="", related_name="+", ) @@ -536,3 +539,52 @@ def filename(self): class Meta: abstract = True + + +class CoreSyncState(models.Model): + """ + Guarda o checkpoint da última coleta da API Core por recurso. + + A próxima coleta deve sempre retomar a partir de ``last_updated_at``. + """ + + resource = models.CharField(_("Resource"), max_length=50, unique=True) + last_updated_at = models.DateTimeField(_("Last updated at"), null=True, blank=True) + last_success_at = models.DateTimeField(_("Last success at"), null=True, blank=True) + + class Meta: + verbose_name = _("Core sync state") + verbose_name_plural = _("Core sync states") + + def __unicode__(self): + return self.resource + + def __str__(self): + return self.resource + + @classmethod + def get_for_resource(cls, resource): + obj, _ = cls.objects.get_or_create(resource=resource) + return obj + + def get_from_date_updated(self, default): + """ + Retorna a data inicial para o filtro ``from_date_created`` da API. + + Usa sempre a última data coletada; se ainda não houver checkpoint, + retorna ``default``. + """ + if self.last_updated_at: + return self.last_updated_at.date().isoformat() + return default + + def update_checkpoint(self, max_updated_at=None): + """ + Atualiza o checkpoint após uma execução bem-sucedida de sync. + """ + update_fields = ["last_success_at"] + if max_updated_at: + self.last_updated_at = max_updated_at + update_fields.append("last_updated_at") + self.last_success_at = timezone.now() + self.save(update_fields=update_fields) diff --git a/core/utils/sync_state.py b/core/utils/sync_state.py new file mode 100644 index 0000000..e093337 --- /dev/null +++ b/core/utils/sync_state.py @@ -0,0 +1,49 @@ +from django.utils import timezone +from django.utils.dateparse import parse_datetime + + +def _normalize_datetime(value): + if value is None: + return None + if hasattr(value, "utcoffset"): + dt = value + else: + dt = parse_datetime(str(value)) + if dt is None: + return None + if timezone.is_naive(dt): + dt = timezone.make_aware(dt, timezone.utc) + return dt + + +def track_max_from_item(current_max, item, field="updated"): + """ + Retorna o timestamp mais recente encontrado ao iterar resultados da API. + + Converte ``item[field]`` e ``current_max`` para ``datetime`` antes de + comparar, evitando erro ao misturar string ISO da API com ``DateTimeField``. + + Args: + current_max: ``datetime`` já processado, ou None. + item: Dicionário retornado pela API Core. + field: Nome do campo de data em ``item`` (padrão: ``created``). + + Returns: + O ``datetime`` mais recente entre ``current_max`` e ``item[field]``. + """ + value = _normalize_datetime(item.get(field)) + current_max = _normalize_datetime(current_max) + if value and (current_max is None or value > current_max): + return value + return current_max + + +def finalize_core_sync_state(sync_state, max_updated_at): + """ + Persiste o checkpoint após uma execução bem-sucedida de sync da API Core. + + Args: + sync_state: Instância de ``CoreSyncState`` do recurso sincronizado. + max_updated_at: Maior ``created`` (ou equivalente) visto na execução. + """ + sync_state.update_checkpoint(max_updated_at) diff --git a/markup_doc/autocomplete.py b/markup_doc/autocomplete.py new file mode 100644 index 0000000..8f9fda4 --- /dev/null +++ b/markup_doc/autocomplete.py @@ -0,0 +1,56 @@ +from http import HTTPStatus +from urllib.parse import unquote + +from django.apps import apps +from django.http import HttpResponseBadRequest, JsonResponse +from django.db.models import Q +from django.urls import re_path +from django.views.decorators.http import require_POST +from wagtail.admin.auth import require_admin_access +from wagtailautocomplete.views import create, objects, render_page, search as default_search + + +@require_POST +def search(request): + target_model = request.POST.get("type", "wagtailcore.Page") + is_article_issue_filter = ( + request.POST.get("article_docx_markup_issue_filter") == "1" + ) + if target_model != "markup_doc.Issue" or not is_article_issue_filter: + return default_search(request) + + journal_id = request.POST.get("journal_id") + if not journal_id: + return JsonResponse({"items": []}) + + try: + limit = int(request.POST.get("limit", 100)) + model = apps.get_model(target_model) + except (LookupError, ValueError): + return HttpResponseBadRequest() + + search_query = request.POST.get("query", "") + queryset = model.objects.filter(journal_id=journal_id) + if search_query: + queryset = queryset.filter( + Q(number__icontains=search_query) + | Q(volume__icontains=search_query) + | Q(year__icontains=search_query) + | Q(supplement__icontains=search_query) + | Q(journal__title__icontains=search_query) + ) + + exclude = request.POST.get("exclude", "") + if exclude: + exclusions = [unquote(item) for item in exclude.split(",") if item] + queryset = queryset.exclude(pk__in=exclusions) + + results = map(render_page, queryset.order_by("volume", "number", "year")[:limit]) + return JsonResponse({"items": list(results)}, status=HTTPStatus.OK) + + +urlpatterns = [ + re_path(r"^create/", require_admin_access(create)), + re_path(r"^objects/", require_admin_access(objects)), + re_path(r"^search/", require_admin_access(search)), +] diff --git a/markup_doc/migrations/0003_remove_articledocxmarkup_dateiso_and_more.py b/markup_doc/migrations/0003_remove_articledocxmarkup_dateiso_and_more.py new file mode 100644 index 0000000..068e369 --- /dev/null +++ b/markup_doc/migrations/0003_remove_articledocxmarkup_dateiso_and_more.py @@ -0,0 +1,65 @@ +# Generated by Django 6.0.5 on 2026-05-31 17:36 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('markup_doc', '0002_alter_articledocx_estatus_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.RemoveField( + model_name='articledocxmarkup', + name='dateiso', + ), + migrations.RemoveField( + model_name='articledocxmarkup', + name='issid_part', + ), + migrations.RemoveField( + model_name='articledocxmarkup', + name='month', + ), + migrations.RemoveField( + model_name='articledocxmarkup', + name='supplno', + ), + migrations.RemoveField( + model_name='articledocxmarkup', + name='supplvol', + ), + migrations.RemoveField( + model_name='articledocxmarkup', + name='vol', + ), + migrations.CreateModel( + name='Issue', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created', models.DateTimeField(auto_now_add=True, verbose_name='Creation date')), + ('updated', models.DateTimeField(auto_now=True, verbose_name='Last update date')), + ('number', models.CharField(blank=True, max_length=20, null=True, verbose_name='Issue number')), + ('volume', models.CharField(blank=True, max_length=20, null=True, verbose_name='Issue volume')), + ('season', models.CharField(blank=True, help_text='Ex: Jan-Abr.', max_length=20, null=True, verbose_name='Issue season')), + ('year', models.CharField(blank=True, max_length=4, null=True, verbose_name='Issue year')), + ('month', models.CharField(blank=True, max_length=20, null=True, verbose_name='Issue month')), + ('supplement', models.CharField(blank=True, max_length=20, null=True, verbose_name='Supplement')), + ('creator', models.ForeignKey(editable=False, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_creator', to=settings.AUTH_USER_MODEL, verbose_name='Creator')), + ('journal', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='markup_doc.journalmodel', verbose_name='Journal')), + ('updated_by', models.ForeignKey(blank=True, editable=False, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='%(class)s_last_mod_user', to=settings.AUTH_USER_MODEL, verbose_name='Updater')), + ], + options={ + 'abstract': False, + }, + ), + migrations.AlterField( + model_name='articledocxmarkup', + name='issue', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='markup_doc.issue', verbose_name='Issue'), + ), + ] diff --git a/markup_doc/models.py b/markup_doc/models.py index b3ef32f..1eace09 100644 --- a/markup_doc/models.py +++ b/markup_doc/models.py @@ -1,5 +1,6 @@ from django import forms from django.db import models +from django.db.models import Q from django.urls import reverse from django.utils.html import format_html from django.utils.translation import gettext_lazy as _ @@ -262,6 +263,74 @@ def __str__(self): return self.title or "" +class Issue(CommonControlField, ClusterableModel): + """ + Class that represent an Issue + """ + + journal = models.ForeignKey( + JournalModel, + verbose_name=_("Journal"), + null=True, + blank=True, + on_delete=models.SET_NULL, + ) + number = models.CharField(_("Issue number"), max_length=20, null=True, blank=True) + volume = models.CharField(_("Issue volume"), max_length=20, null=True, blank=True) + season = models.CharField( + _("Issue season"), + max_length=20, + null=True, + blank=True, + help_text=_("Ex: Jan-Abr."), + ) + year = models.CharField(_("Issue year"), max_length=4, null=True, blank=True) + month = models.CharField(_("Issue month"), max_length=20, null=True, blank=True) + supplement = models.CharField(_("Supplement"), max_length=20, null=True, blank=True) + + panels = [ + AutocompletePanel("journal"), + FieldPanel("number"), + FieldPanel("volume"), + FieldPanel("season"), + FieldPanel("year"), + FieldPanel("month"), + FieldPanel("supplement"), + ] + + autocomplete_search_field = "number" + + @classmethod + def autocomplete_custom_queryset_filter(cls, search_query): + return cls.objects.filter( + Q(number__icontains=search_query) + | Q(volume__icontains=search_query) + | Q(year__icontains=search_query) + | Q(supplement__icontains=search_query) + | Q(journal__title__icontains=search_query) + ) + + def autocomplete_label(self): + return str(self) + + def __str__(self): + return f"{self.generate_issue_folder} - {self.journal}" + + @property + def generate_issue_folder(self): + """ + Gera o identificador do issue no formato vXnYsZ. + + Returns: + str: String no formato vXnYsZ (ex: v10n2s1, v5n3, n2s1) + """ + values = (self.volume, self.number, self.supplement) + labels = ("v", "n", "s") + return "".join( + [f"{label}{value}" for label, value in zip(labels, values) if value] + ) + + def get_default_collection_acron(): try: obj = CollectionModel.objects.select_related("collection").first() @@ -302,15 +371,13 @@ class ArticleDocxMarkup(CommonControlField, ClusterableModel): license = models.URLField( max_length=500, blank=True, null=True, verbose_name=_("License (URL)") ) - vol = models.IntegerField(verbose_name=_("Volume"), null=True, blank=True) - supplvol = models.IntegerField( - verbose_name=_("Suppl Volume"), null=True, blank=True + issue = models.ForeignKey( + Issue, + verbose_name=_("Issue"), + null=True, + blank=True, + on_delete=models.SET_NULL, ) - issue = models.IntegerField(verbose_name=_("Issue"), null=True, blank=True) - supplno = models.IntegerField(verbose_name=_("Suppl Num"), null=True, blank=True) - issid_part = models.TextField(_("Isid Part"), null=True, blank=True) - dateiso = models.TextField(_("Dateiso"), null=True, blank=True) - month = models.TextField(_("Month/Season"), null=True, blank=True) fpage = models.TextField(_("First Page"), null=True, blank=True) seq = models.TextField(_("@Seq"), null=True, blank=True) lpage = models.TextField(_("Last Page"), null=True, blank=True) @@ -371,6 +438,7 @@ class ArticleDocxMarkup(CommonControlField, ClusterableModel): FieldPanel("file"), FieldPanel("collection"), AutocompletePanel("journal"), + AutocompletePanel("issue"), ] def __unicode__(self): @@ -457,13 +525,7 @@ class MarkupXML(ArticleDocxMarkup): FieldPanel("nimtitle"), FieldPanel("pubname"), FieldPanel("license"), - FieldPanel("vol"), - FieldPanel("supplvol"), - FieldPanel("issue"), - FieldPanel("supplno"), - FieldPanel("issid_part"), - FieldPanel("dateiso"), - FieldPanel("month"), + AutocompletePanel("issue"), FieldPanel("fpage"), FieldPanel("seq"), FieldPanel("lpage"), diff --git a/markup_doc/static/js/issue-autocomplete-filter.js b/markup_doc/static/js/issue-autocomplete-filter.js new file mode 100644 index 0000000..07657f2 --- /dev/null +++ b/markup_doc/static/js/issue-autocomplete-filter.js @@ -0,0 +1,110 @@ +(function () { + function parseAutocompleteValue(value) { + if (!value || value === "null") { + return null; + } + + try { + return JSON.parse(value); + } catch (error) { + return null; + } + } + + function getSelectedJournalId() { + var journalInput = document.querySelector('[name="journal"]'); + var value = parseAutocompleteValue(journalInput && journalInput.value); + return value && value.pk ? value.pk : null; + } + + function getIssueInput() { + return document.querySelector('[name="issue"]'); + } + + function isArticleDocxMarkupForm() { + return Boolean(document.querySelector('[name="journal"]') && getIssueInput()); + } + + function getBodyValue(body, key) { + if (body instanceof FormData || body instanceof URLSearchParams) { + return body.get(key); + } + + return null; + } + + function setBodyValue(body, key, value) { + if (body instanceof FormData || body instanceof URLSearchParams) { + body.set(key, value); + } + } + + function isIssueAutocompleteRequest(body) { + return getBodyValue(body, "type") === "markup_doc.Issue"; + } + + function addJournalIdToIssueRequest(body) { + if (!isArticleDocxMarkupForm() || !isIssueAutocompleteRequest(body)) { + return; + } + + setBodyValue(body, "article_docx_markup_issue_filter", "1"); + + var journalId = getSelectedJournalId(); + if (journalId) { + setBodyValue(body, "journal_id", journalId); + } + } + + function clearIssueSelection() { + var issueInput = getIssueInput(); + if (!issueInput || !issueInput.value || issueInput.value === "null") { + return; + } + + issueInput.value = ""; + issueInput.dispatchEvent(new Event("change", { bubbles: true })); + } + + function watchJournalChanges() { + if (!isArticleDocxMarkupForm()) { + return; + } + + var lastJournalId = getSelectedJournalId(); + + window.setInterval(function () { + var currentJournalId = getSelectedJournalId(); + if (currentJournalId !== lastJournalId) { + lastJournalId = currentJournalId; + clearIssueSelection(); + } + }, 500); + } + + var originalSend = XMLHttpRequest.prototype.send; + + XMLHttpRequest.prototype.send = function (body) { + addJournalIdToIssueRequest(body); + + return originalSend.call(this, body); + }; + + if (window.fetch) { + var originalFetch = window.fetch; + + window.fetch = function (resource, options) { + if (options && options.body) { + addJournalIdToIssueRequest(options.body); + } + + return originalFetch.call(this, resource, options); + }; + } + + if (document.readyState === "loading") { + document.addEventListener("DOMContentLoaded", watchJournalChanges); + } else { + watchJournalChanges(); + } +})(); diff --git a/markup_doc/sync_api.py b/markup_doc/sync_api.py index fd34af3..d1bb46f 100644 --- a/markup_doc/sync_api.py +++ b/markup_doc/sync_api.py @@ -1,13 +1,29 @@ import logging +from urllib.parse import urlencode from django.conf import settings -from django.db import transaction +from django.db.models import Q +from core.models import CoreSyncState from core.utils.requester import fetch_data as fetch -from markup_doc.models import CollectionModel, CollectionValuesModel, JournalModel +from core.utils.sync_state import finalize_core_sync_state, track_max_from_item +from markup_doc.models import CollectionModel, CollectionValuesModel, Issue, JournalModel logger = logging.getLogger(__name__) +ISSUE_SYNC_RESOURCE = "issue" + + +def _iter_api_pages(url, resource_name): + while url: + logger.info(f"Syncing {resource_name} page: {url}") + + data = fetch( + url, headers={"Accept": "application/json"}, json=True, timeout=(10, 60) + ) + yield data.get("results", []) + url = data.get("next") + def sync_collection_from_api(): url = settings.CORE_COLLECTION_API_URL @@ -34,72 +50,153 @@ def sync_collection_from_api(): ) -def sync_journals_from_api(): - journals = JournalModel.objects.all() - if journals.exists(): - journals.delete() - - obj = CollectionModel.objects.select_related("collection").first() - - acron_selected = obj.collection.acron if obj and obj.collection else None - if not acron_selected: - logger.warning("No collection selected; skipping journal sync") - return +def _build_journal_from_api_item(item): + title = item.get("title", None) + short_title = item.get("short_title", None) + acronym = item.get("acronym", None) + pissn = item.get("official", {}).get("issn_print", None) if item.get("official", {}) else None + eissn = item.get("official", {}).get("issn_electronic", None) if item.get("official", {}) else None + pubname = item.get("publisher", []) + title_in_database = item.get("title_in_database", []) + title_nlm = None + + if title_in_database: + for t in title_in_database: + if t.get("name", None) == "MEDLINE": + title_nlm = t.get("title", None) + + if pubname: + pubname = pubname[0].get("name", None) + else: + pubname = None + + scielo_journals = item.get("scielo_journal", []) + issn_scielo = None + if scielo_journals: + issn_scielo = scielo_journals[0].get("issn_scielo", None) + + return JournalModel( + title=title, + short_title=short_title, + acronym=acronym, + pissn=pissn, + eissn=eissn, + pubname=pubname, + title_nlm=title_nlm, + issn=issn_scielo, + ) + + +def build_api_url_core(domain, endpoint, params): + url = f"{domain}{endpoint}" + query = urlencode(params) + return f"{url}?{query}" - new_journals = [] - url = settings.CORE_JOURNAL_API_URL - while url: - logger.info("Syncing journals page: %s", url) - data = fetch( - url, headers={"Accept": "application/json"}, json=True, timeout=(10, 60) +def sync_journals_from_api(): + sync_state = CoreSyncState.get_for_resource(resource="journal") + from_date_updated = sync_state.get_from_date_updated( + settings.CORE_ISSUE_FROM_DATE_CREATED + ) + url = build_api_url_core( + domain=settings.CORE_API_DOMAIN, + endpoint=settings.CORE_JOURNAL_API_ENDPOINT, + params={ + "from_date_updated": from_date_updated + } + ) + synced_count = 0 + skipped_count = 0 + max_created = sync_state.last_updated_at + + for items in _iter_api_pages(url, "journals"): + for item in items: + journal = _build_journal_from_api_item(item) + obj, _ = JournalModel.objects.update_or_create( + title=journal.title, + defaults={ + "short_title": journal.short_title, + "title_nlm": journal.title_nlm, + "acronym": journal.acronym, + "issn": journal.issn, + "pissn": journal.pissn, + "eissn": journal.eissn, + "pubname": journal.pubname, + }, + ) + logger.info(f"Journal {obj} completed") + max_created = track_max_from_item(max_created, item) + finalize_core_sync_state(sync_state, max_created) + logger.info( + f"Journal sync finished. Synced={synced_count} skipped={skipped_count}" + ) + + +def _get_journal_from_issue_data(issue_data): + journal_data = issue_data.get("journal") or {} + issn_values = [ + journal_data.get("issn_print"), + journal_data.get("issn_electronic"), + journal_data.get("scielo_journal"), + ] + issn_values = [v for v in issn_values if v] + + if not issn_values: + return None + + return ( + JournalModel.objects.filter( + Q(pissn__in=issn_values) + | Q(eissn__in=issn_values) + | Q(issn__in=issn_values) ) - - for item in data["results"]: - title = item.get("title", None) - short_title = item.get("short_title", None) - acronym = item.get("acronym", None) - pissn = item.get("official", {}).get("issn_print", None) - eissn = item.get("official", {}).get("issn_electronic", None) - acronym = item.get("acronym", None) - pubname = item.get("publisher", []) - title_in_database = item.get("title_in_database", []) - title_nlm = None - - if title_in_database: - for t in title_in_database: - if t.get("name", None) == "MEDLINE": - title_nlm = t.get("title", None) - - if pubname: - pubname = pubname[0].get("name", None) - - scielo_journals = item.get("scielo_journal", []) - - # Obtener la primera colección asociada, si existe - collection_acron = None - issn_scielo = None - if scielo_journals: - collection_acron = scielo_journals[0].get("collection_acron") - issn_scielo = scielo_journals[0].get("issn_scielo", None) - - if not title or acron_selected != collection_acron: - continue # Saltar si falta el título - - journal = JournalModel( - title=title, - short_title=short_title or None, - title_nlm=title_nlm or None, - acronym=acronym or None, - issn=issn_scielo or None, - pissn=pissn or None, - eissn=eissn or None, - pubname=pubname or None, + .order_by("id") + .first() + ) + +def build_issue_from_data(item): + issue_data = { + "number": item.get("number") or None, + "volume": item.get("volume") or None, + "season": item.get("season") or None, + "year": item.get("year") or None, + "month": item.get("month") or None, + "supplement": item.get("supplement") or None, + } + return issue_data + + +def sync_issues_from_api(): + sync_state = CoreSyncState.get_for_resource(resource="issue") + from_date_updated = sync_state.get_from_date_updated( + settings.CORE_ISSUE_FROM_DATE_CREATED + ) + url = build_api_url_core( + domain=settings.CORE_API_DOMAIN, + endpoint=settings.CORE_ISSUE_API_ENDPOINT, + params={ + "from_date_updated": from_date_updated + } + ) + synced_count = 0 + skipped_count = 0 + max_created = sync_state.last_updated_at + + for items in _iter_api_pages(url, "issues"): + for item in items: + journal = _get_journal_from_issue_data(item) + if not journal: + skipped_count += 1 + continue + issue_data = build_issue_from_data(item) + issue_data.update({"journal": journal}) + Issue.objects.get_or_create( + **issue_data, ) - new_journals.append(journal) - - url = data.get("next") + synced_count += 1 + max_created = track_max_from_item(max_created, item) + finalize_core_sync_state(sync_state, max_created) - if new_journals: - with transaction.atomic(): - JournalModel.objects.bulk_create(new_journals, ignore_conflicts=True) + logger.info( + f"Issue sync finished. from_date_created={from_date_updated} synced={synced_count} skipped={skipped_count}" + ) diff --git a/markup_doc/tasks.py b/markup_doc/tasks.py index a2e5717..c480f22 100644 --- a/markup_doc/tasks.py +++ b/markup_doc/tasks.py @@ -6,10 +6,13 @@ # Third-party imports import langid +from config import celery_app from django.core.files.base import ContentFile from django.utils.text import slugify +from markuplib.function_docx import functionsDocx +from model_ai.llama import LlamaInputSettings, LlamaService +from reference.config_gemini import create_prompt_reference -from config import celery_app from markup_doc.labeling_utils import ( MODEL_NAME_GEMINI, MODEL_NAME_LLAMA, @@ -24,11 +27,8 @@ split_in_three, ) from markup_doc.models import MarkupXML, ProcessStatus, UploadDocx -from markup_doc.sync_api import sync_journals_from_api +from markup_doc.sync_api import sync_issues_from_api, sync_journals_from_api from markup_doc.xml import get_xml -from markuplib.function_docx import functionsDocx -from model_ai.llama import LlamaInputSettings, LlamaService -from reference.config_gemini import create_prompt_reference logger = logging.getLogger(__name__) @@ -71,10 +71,15 @@ def clean_labels(text): @celery_app.task() -def task_sync_journals_from_api(): +def task_sync_journals_from_api(user_id=None, collection_acron=None): sync_journals_from_api() +@celery_app.task() +def task_sync_issues_from_api(user_id=None): + sync_issues_from_api() + + @celery_app.task() def get_labels(article_id, user_id): llm_model = get_llm_model_name() @@ -89,8 +94,6 @@ def get_labels(article_id, user_id): doc = functionsDocx.openDocx(article_docx.file.path) sections, content = functionsDocx().extractContent(doc, article_docx.file.path) article_docx_markup = article_docx - text_title = "" - text_paragraph = "" stream_data = [] stream_data_body = [] stream_data_back = [] diff --git a/markup_doc/wagtail_hooks.py b/markup_doc/wagtail_hooks.py index 4eb729b..170cc9e 100644 --- a/markup_doc/wagtail_hooks.py +++ b/markup_doc/wagtail_hooks.py @@ -1,3 +1,4 @@ +from config.menu import get_menu_order from django.db import transaction from django.http import HttpResponseRedirect from django.template.response import TemplateResponse @@ -5,6 +6,7 @@ from django.urls import path from django.utils.html import format_html from django.utils.translation import gettext_lazy as _ +from reference.wagtail_hooks import ReferenceModelViewSet from wagtail import hooks from wagtail.admin import messages from wagtail.snippets.models import register_snippet @@ -14,12 +16,15 @@ SnippetViewSet, SnippetViewSetGroup, ) -from wagtail_modeladmin.options import ModelAdmin +from xml_manager.wagtail_hooks import ( + XMLDocumentHTMLSnippetViewSet, + XMLDocumentPDFSnippetViewSet, +) -from config.menu import get_menu_order from markup_doc import views from markup_doc.models import ( CollectionModel, + Issue, JournalModel, MarkupXML, ProcessStatus, @@ -27,11 +32,6 @@ ) from markup_doc.sync_api import sync_collection_from_api from markup_doc.tasks import get_labels, task_sync_journals_from_api, update_xml -from reference.wagtail_hooks import ReferenceModelViewSet -from xml_manager.wagtail_hooks import ( - XMLDocumentHTMLSnippetViewSet, - XMLDocumentPDFSnippetViewSet, -) @hooks.register("register_admin_urls") @@ -51,8 +51,9 @@ def register_admin_urls(): @hooks.register("insert_editor_js") def xref_js(): return format_html( - '', + '', static("js/xref-button.js"), + static("js/issue-autocomplete-filter.js"), ) @@ -184,6 +185,18 @@ def index_view(self, request): return response +class IssueViewSet(SnippetViewSet): + model = Issue + menu_label = _("Fascículos") + menu_icon = "date" + add_to_admin_menu = False + exclude_from_explorer = False + list_per_page = 20 + list_display = ("journal", "volume", "number", "year") + search_fields = ("journal__title", "volume", "number", "year") + list_filter = ("journal", "year") + + class XMLSPSSnippetViewSetGroup(SnippetViewSetGroup): menu_name = "xml_sps" menu_label = _("XML SPS") @@ -215,6 +228,7 @@ class MarkupSnippetViewSetGroup(SnippetViewSetGroup): items = ( UploadDocxViewSet, XMLSPSSnippetViewSetGroup, + IssueViewSet, ) diff --git a/markup_doc/xml.py b/markup_doc/xml.py index 3698ca9..d045bc1 100644 --- a/markup_doc/xml.py +++ b/markup_doc/xml.py @@ -306,6 +306,29 @@ def get_xml(article_docx, data_front, data, data_back): node_tmp2 = etree.SubElement(node_tmp, "year") node_tmp2.text = article_docx.artdate.strftime("%Y") + + issue = article_docx.issue + + if issue and (issue.year or issue.month): + node_tmp = etree.SubElement(node, 'pub-date') + node_tmp.set('date-type', 'collection') + node_tmp.set('publication-format', 'electronic') + + if issue.month: + node_tmp2 = etree.SubElement(node_tmp, 'month') + node_tmp2.text = issue.month + + if issue.year: + node_tmp2 = etree.SubElement(node_tmp, 'year') + node_tmp2.text = issue.year + + if issue and issue.volume: + node_tmp = etree.SubElement(node, 'volume') + node_tmp.text = str(issue.volume) + + if issue and issue.number: + node_tmp = etree.SubElement(node, 'issue') + node_tmp.text = str(issue.number) if article_docx.dateiso: node_tmp = etree.SubElement(node, "pub-date") @@ -329,14 +352,6 @@ def get_xml(article_docx, data_front, data, data_back): node_tmp2 = etree.SubElement(node_tmp, "year") node_tmp2.text = article_docx.dateiso.split("-")[0] - if article_docx.vol: - node_tmp = etree.SubElement(node, "volume") - node_tmp.text = str(article_docx.vol) - - if article_docx.issue: - node_tmp = etree.SubElement(node, "issue") - node_tmp.text = str(article_docx.issue) - if article_docx.elocatid: node_tmp = etree.SubElement(node, "elocation-id") node_tmp.text = article_docx.elocatid