Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 75 additions & 44 deletions markup_doc/sync_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,27 @@ def build_api_url_core(domain, endpoint, params):
return f"{url}?{query}"


def sync_journals_from_api():
def sync_journals_from_api(
collection_acron=None,
issn_scielo=None,
from_date_updated=None,
):
sync_state = CoreSyncState.get_for_resource(resource="journal")
from_date_updated = sync_state.get_from_date_updated(
settings.CORE_ISSUE_FROM_DATE_CREATED
)
if from_date_updated is None:
from_date_updated = sync_state.get_from_date_updated(
settings.CORE_ISSUE_FROM_DATE_CREATED
)

params = {"from_date_updated": from_date_updated}
if collection_acron:
params["collection"] = collection_acron
if issn_scielo:
params["issn_scielo"] = issn_scielo

url = build_api_url_core(
domain=settings.CORE_API_DOMAIN,
endpoint=settings.CORE_JOURNAL_API_ENDPOINT,
params={
"from_date_updated": from_date_updated
}
params=params,
)
synced_count = 0
skipped_count = 0
Expand All @@ -113,20 +123,22 @@ def sync_journals_from_api():
for item in items:
journal = _build_journal_from_api_item(item)
obj, _ = JournalModel.objects.update_or_create(
title=journal.title,
defaults={
"short_title": journal.short_title,
"title_nlm": journal.title_nlm,
"acronym": journal.acronym,
"issn": journal.issn,
"pissn": journal.pissn,
"eissn": journal.eissn,
"pubname": journal.pubname,
},
title=journal.title,
defaults={
"short_title": journal.short_title,
"title_nlm": journal.title_nlm,
"acronym": journal.acronym,
"issn": journal.issn,
"pissn": journal.pissn,
"eissn": journal.eissn,
"pubname": journal.pubname,
},
)
logger.info(f"Journal {obj} completed")
synced_count += 1
max_created = track_max_from_item(max_created, item)
finalize_core_sync_state(sync_state, max_created)

finalize_core_sync_state(sync_state, max_created)
logger.info(
f"Journal sync finished. Synced={synced_count} skipped={skipped_count}"
)
Expand Down Expand Up @@ -166,37 +178,56 @@ def build_issue_from_data(item):
return issue_data


def sync_issues_from_api():
def _get_registered_issn_scielo_values(issn_scielo=None):
queryset = JournalModel.objects.exclude(issn__isnull=True).exclude(issn="")
if issn_scielo:
queryset = queryset.filter(issn=issn_scielo)
return queryset.values_list("issn", flat=True).distinct()


def sync_issues_from_api(issn_scielo=None, from_date_updated=None):
sync_state = CoreSyncState.get_for_resource(resource="issue")
from_date_updated = sync_state.get_from_date_updated(
settings.CORE_ISSUE_FROM_DATE_CREATED
)
url = build_api_url_core(
domain=settings.CORE_API_DOMAIN,
endpoint=settings.CORE_ISSUE_API_ENDPOINT,
params={
"from_date_updated": from_date_updated
}
)
if from_date_updated is None:
from_date_updated = sync_state.get_from_date_updated(
settings.CORE_ISSUE_FROM_DATE_CREATED
)

registered_issns = _get_registered_issn_scielo_values(issn_scielo=issn_scielo)
if not registered_issns:
logger.warning(
"Issue sync skipped: no registered journals found"
+ (f" for issn_scielo={issn_scielo}" if issn_scielo else "")
)
return

synced_count = 0
skipped_count = 0
max_created = sync_state.last_updated_at

for items in _iter_api_pages(url, "issues"):
for item in items:
journal = _get_journal_from_issue_data(item)
if not journal:
skipped_count += 1
continue
issue_data = build_issue_from_data(item)
issue_data.update({"journal": journal})
Issue.objects.get_or_create(
**issue_data,
)
synced_count += 1
max_created = track_max_from_item(max_created, item)
finalize_core_sync_state(sync_state, max_created)
for journal_issn in registered_issns:
url = build_api_url_core(
domain=settings.CORE_API_DOMAIN,
endpoint=settings.CORE_ISSUE_API_ENDPOINT,
params={
"from_date_updated": from_date_updated,
"issn_scielo": journal_issn,
},
)

for items in _iter_api_pages(url, f"issues ({journal_issn})"):
for item in items:
journal = _get_journal_from_issue_data(item)
if not journal:
skipped_count += 1
continue
issue_data = build_issue_from_data(item)
issue_data.update({"journal": journal})
Issue.objects.get_or_create(**issue_data)
synced_count += 1
max_created = track_max_from_item(max_created, item)

finalize_core_sync_state(sync_state, max_created)
logger.info(
f"Issue sync finished. from_date_created={from_date_updated} synced={synced_count} skipped={skipped_count}"
f"Issue sync finished. from_date_updated={from_date_updated} "
f"synced={synced_count} skipped={skipped_count}"
)
24 changes: 20 additions & 4 deletions markup_doc/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,29 @@ def clean_labels(text):


@celery_app.task()
def task_sync_journals_from_api(user_id=None, collection_acron=None):
sync_journals_from_api()
def task_sync_journals_from_api(
user_id=None,
collection_acron=None,
issn_scielo=None,
from_date_updated=None,
):
sync_journals_from_api(
collection_acron=collection_acron,
issn_scielo=issn_scielo,
from_date_updated=from_date_updated,
)


@celery_app.task()
def task_sync_issues_from_api(user_id=None):
sync_issues_from_api()
def task_sync_issues_from_api(
user_id=None,
issn_scielo=None,
from_date_updated=None,
):
sync_issues_from_api(
issn_scielo=issn_scielo,
from_date_updated=from_date_updated,
)


@celery_app.task()
Expand Down
Loading