diff --git a/markup_doc/sync_api.py b/markup_doc/sync_api.py index d1bb46f..c31738d 100644 --- a/markup_doc/sync_api.py +++ b/markup_doc/sync_api.py @@ -93,17 +93,27 @@ def build_api_url_core(domain, endpoint, params): return f"{url}?{query}" -def sync_journals_from_api(): +def sync_journals_from_api( + collection_acron=None, + issn_scielo=None, + from_date_updated=None, +): sync_state = CoreSyncState.get_for_resource(resource="journal") - from_date_updated = sync_state.get_from_date_updated( - settings.CORE_ISSUE_FROM_DATE_CREATED - ) + if from_date_updated is None: + from_date_updated = sync_state.get_from_date_updated( + settings.CORE_ISSUE_FROM_DATE_CREATED + ) + + params = {"from_date_updated": from_date_updated} + if collection_acron: + params["collection"] = collection_acron + if issn_scielo: + params["issn_scielo"] = issn_scielo + url = build_api_url_core( domain=settings.CORE_API_DOMAIN, endpoint=settings.CORE_JOURNAL_API_ENDPOINT, - params={ - "from_date_updated": from_date_updated - } + params=params, ) synced_count = 0 skipped_count = 0 @@ -113,20 +123,22 @@ def sync_journals_from_api(): for item in items: journal = _build_journal_from_api_item(item) obj, _ = JournalModel.objects.update_or_create( - title=journal.title, - defaults={ - "short_title": journal.short_title, - "title_nlm": journal.title_nlm, - "acronym": journal.acronym, - "issn": journal.issn, - "pissn": journal.pissn, - "eissn": journal.eissn, - "pubname": journal.pubname, - }, + title=journal.title, + defaults={ + "short_title": journal.short_title, + "title_nlm": journal.title_nlm, + "acronym": journal.acronym, + "issn": journal.issn, + "pissn": journal.pissn, + "eissn": journal.eissn, + "pubname": journal.pubname, + }, ) logger.info(f"Journal {obj} completed") + synced_count += 1 max_created = track_max_from_item(max_created, item) - finalize_core_sync_state(sync_state, max_created) + + finalize_core_sync_state(sync_state, max_created) logger.info( f"Journal sync finished. Synced={synced_count} skipped={skipped_count}" ) @@ -166,37 +178,56 @@ def build_issue_from_data(item): return issue_data -def sync_issues_from_api(): +def _get_registered_issn_scielo_values(issn_scielo=None): + queryset = JournalModel.objects.exclude(issn__isnull=True).exclude(issn="") + if issn_scielo: + queryset = queryset.filter(issn=issn_scielo) + return queryset.values_list("issn", flat=True).distinct() + + +def sync_issues_from_api(issn_scielo=None, from_date_updated=None): sync_state = CoreSyncState.get_for_resource(resource="issue") - from_date_updated = sync_state.get_from_date_updated( - settings.CORE_ISSUE_FROM_DATE_CREATED - ) - url = build_api_url_core( - domain=settings.CORE_API_DOMAIN, - endpoint=settings.CORE_ISSUE_API_ENDPOINT, - params={ - "from_date_updated": from_date_updated - } - ) + if from_date_updated is None: + from_date_updated = sync_state.get_from_date_updated( + settings.CORE_ISSUE_FROM_DATE_CREATED + ) + + registered_issns = _get_registered_issn_scielo_values(issn_scielo=issn_scielo) + if not registered_issns: + logger.warning( + "Issue sync skipped: no registered journals found" + + (f" for issn_scielo={issn_scielo}" if issn_scielo else "") + ) + return + synced_count = 0 skipped_count = 0 max_created = sync_state.last_updated_at - for items in _iter_api_pages(url, "issues"): - for item in items: - journal = _get_journal_from_issue_data(item) - if not journal: - skipped_count += 1 - continue - issue_data = build_issue_from_data(item) - issue_data.update({"journal": journal}) - Issue.objects.get_or_create( - **issue_data, - ) - synced_count += 1 - max_created = track_max_from_item(max_created, item) - finalize_core_sync_state(sync_state, max_created) + for journal_issn in registered_issns: + url = build_api_url_core( + domain=settings.CORE_API_DOMAIN, + endpoint=settings.CORE_ISSUE_API_ENDPOINT, + params={ + "from_date_updated": from_date_updated, + "issn_scielo": journal_issn, + }, + ) + for items in _iter_api_pages(url, f"issues ({journal_issn})"): + for item in items: + journal = _get_journal_from_issue_data(item) + if not journal: + skipped_count += 1 + continue + issue_data = build_issue_from_data(item) + issue_data.update({"journal": journal}) + Issue.objects.get_or_create(**issue_data) + synced_count += 1 + max_created = track_max_from_item(max_created, item) + + finalize_core_sync_state(sync_state, max_created) logger.info( - f"Issue sync finished. from_date_created={from_date_updated} synced={synced_count} skipped={skipped_count}" + f"Issue sync finished. from_date_updated={from_date_updated} " + f"synced={synced_count} skipped={skipped_count}" ) diff --git a/markup_doc/tasks.py b/markup_doc/tasks.py index c480f22..dd7b6b0 100644 --- a/markup_doc/tasks.py +++ b/markup_doc/tasks.py @@ -71,13 +71,29 @@ def clean_labels(text): @celery_app.task() -def task_sync_journals_from_api(user_id=None, collection_acron=None): - sync_journals_from_api() +def task_sync_journals_from_api( + user_id=None, + collection_acron=None, + issn_scielo=None, + from_date_updated=None, +): + sync_journals_from_api( + collection_acron=collection_acron, + issn_scielo=issn_scielo, + from_date_updated=from_date_updated, + ) @celery_app.task() -def task_sync_issues_from_api(user_id=None): - sync_issues_from_api() +def task_sync_issues_from_api( + user_id=None, + issn_scielo=None, + from_date_updated=None, +): + sync_issues_from_api( + issn_scielo=issn_scielo, + from_date_updated=from_date_updated, + ) @celery_app.task()