From 6228400f6e984bf1909d0e6827634654a5ccd19c Mon Sep 17 00:00:00 2001 From: Karolina Przerwa Date: Thu, 11 Jun 2026 11:46:59 +0300 Subject: [PATCH] change(matcher): add more streams --- cds_migrator_kit/errors.py | 5 + cds_migrator_kit/rdm/migration_config.py | 2 +- .../rdm/records/transform/models/research.py | 5 +- .../rdm/records/transform/transform.py | 3 +- .../transform/xml_processing/rules/base.py | 14 +- .../xml_processing/rules/publications.py | 3 + cds_migrator_kit/rdm/streams.yaml | 376 +++++++++++++++++- cds_migrator_kit/transform/dumper.py | 9 +- cds_migrator_kit/users/api.py | 4 +- setup.cfg | 3 +- 10 files changed, 394 insertions(+), 30 deletions(-) diff --git a/cds_migrator_kit/errors.py b/cds_migrator_kit/errors.py index ba6d2d4c..00453339 100644 --- a/cds_migrator_kit/errors.py +++ b/cds_migrator_kit/errors.py @@ -45,6 +45,11 @@ class RecordModelMissing(CDSMigrationException): description = "[Record did not match any available model]" +class MultipleModelsMatched(CDSMigrationException): + """Multiple models matched exception.""" + + description = "[Record matched multiple models]" + class UnexpectedValue(CDSMigrationException): """The corresponding value is unexpected.""" diff --git a/cds_migrator_kit/rdm/migration_config.py b/cds_migrator_kit/rdm/migration_config.py index 690f6fbf..ed243d76 100644 --- a/cds_migrator_kit/rdm/migration_config.py +++ b/cds_migrator_kit/rdm/migration_config.py @@ -377,7 +377,7 @@ def _(x): # needed to avoid start time failure with lazy strings }, "inis": { "label": _("INIS"), - "validator": schemes.is_inspire, + "validator": schemes.is_inis, "datacite": "INIS", }, "indico": { diff --git a/cds_migrator_kit/rdm/records/transform/models/research.py b/cds_migrator_kit/rdm/records/transform/models/research.py index 92bb71b2..065368aa 100644 --- a/cds_migrator_kit/rdm/records/transform/models/research.py +++ b/cds_migrator_kit/rdm/records/transform/models/research.py @@ -41,6 +41,7 @@ class ResearchModel(CdsOverdo): "300__a", # number of pages "340__a", # TODO ignore material? "540__3", # TODO still ignore the material of the license? + "540__9", # TODO still ignore the material of the license? "542__3", # TODO still ignore the material of the license? "595__i", # TODO ?? "695__e", # some inspire tag @@ -50,10 +51,12 @@ class ResearchModel(CdsOverdo): "773__x", # INSPIRE publication note "773__t", # INSPIRE publication note "773__0", # from SIS: can be ignored + "773__o", # from SIS: can be ignored + "773__z", # from SIS: can be ignored "8564_8", # file id "8564_s", # bibdoc id "8564_x", # icon thumbnails sizes - # "8564_y", # file description - done by files dump + "8564_y", # file description - done by files dump, sometimes these are used for open access calculation "8564_w", # system field "913__y", # citation "913__v", # citation diff --git a/cds_migrator_kit/rdm/records/transform/transform.py b/cds_migrator_kit/rdm/records/transform/transform.py index c6bf6874..c2764f92 100644 --- a/cds_migrator_kit/rdm/records/transform/transform.py +++ b/cds_migrator_kit/rdm/records/transform/transform.py @@ -35,7 +35,7 @@ MissingRequiredField, RecordFlaggedCuration, RestrictedFileDetected, - UnexpectedValue, + UnexpectedValue, MultipleModelsMatched, ) from cds_migrator_kit.rdm.migration_config import ( RDM_RECORDS_IDENTIFIERS_SCHEMES, @@ -847,6 +847,7 @@ def _transform(self, entry): UnexpectedValue, ManualImportRequired, MissingRequiredField, + MultipleModelsMatched ) as e: migration_logger.add_log(e, record=entry) diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py index a7c3b2d6..188d612c 100644 --- a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py +++ b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/base.py @@ -155,10 +155,6 @@ def validate_subject_scheme(subject_scheme, subfield, key): is_controlled_subject = key == "65017" and (scheme in CONTROLLED_SUBJECTS_SCHEMES) - # Drop other subjects - if val_a.lower().strip() == "other subjects": - raise IgnoreKey("subjects") - if type(val_a) is tuple: # sometimes keywords are stick in one tag, so they come out as tuple s_values = val_a @@ -176,7 +172,7 @@ def validate_subject_scheme(subject_scheme, subfield, key): raise IgnoreKey("subjects") else: subject_value = val_a.strip() - if subject_value.lower() == "xx": + if subject_value.lower() in ["xx", "other subjects"]: raise IgnoreKey("subjects") _subjects = self.get("subjects", []) # invalid schema = euproject info scheme = scheme @@ -314,7 +310,7 @@ def report_number(self, key, value): if not identifier: if re.findall(udc_pattern, scheme): raise IgnoreKey("identifiers") - elif scheme.startswith("CM-"): + elif scheme.upper().startswith("CM-"): # barcode, to drop raise IgnoreKey("identifiers") elif scheme.upper().startswith("P00"): @@ -322,6 +318,8 @@ def report_number(self, key, value): raise IgnoreKey("identifiers") elif scheme.upper() == "CERN LIBRARY": raise IgnoreKey("identifiers") + elif scheme.upper().startswith("B00"): + raise IgnoreKey("identifiers") elif scheme.startswith("SCOO"): identifier = scheme scheme = "other" @@ -818,6 +816,10 @@ def related_identifiers_787(self, key, value): "relation_type": {"id": "references"}, "resource_type": {"id": "publication-conferencepaper"}, }, + "article":{ + "relation_type": {"id": "references"}, + "resource_type": {"id": "publication-article"}, + } } if recid: diff --git a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/publications.py b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/publications.py index cdabd686..3803d0e2 100644 --- a/cds_migrator_kit/rdm/records/transform/xml_processing/rules/publications.py +++ b/cds_migrator_kit/rdm/records/transform/xml_processing/rules/publications.py @@ -431,6 +431,7 @@ def resource_type(self, key, value): "opal_papers", "aleph_papers", "ps212_papers", + "slintnote", ] committees = { @@ -477,6 +478,7 @@ def resource_type(self, key, value): for i, v in enumerate( [ "conferencepaper", + "proceedings", "bookchapter", "itcerntalk", "antarescerntalk", @@ -513,6 +515,7 @@ def resource_type(self, key, value): mapping = { "preprint": {"id": "publication-preprint"}, "conferencepaper": {"id": "publication-conferencepaper"}, + "proceedings": {"id": "publication-conferenceproceeding"}, "article": {"id": "publication-article"}, "note": {"id": "publication-technicalnote"}, "lcd-notes": {"id": "publication-technicalnote"}, diff --git a/cds_migrator_kit/rdm/streams.yaml b/cds_migrator_kit/rdm/streams.yaml index f649456b..0a45f796 100644 --- a/cds_migrator_kit/rdm/streams.yaml +++ b/cds_migrator_kit/rdm/streams.yaml @@ -84,40 +84,382 @@ records: - "b6553d89-ea62-4a7c-9f5b-e76b5bfdb733" load: legacy_pids_to_redirect: cds_migrator_kit/rdm/data/lep_exp/delphi_priv/duplicated_pids.json - committees: - data_dir: cds_migrator_kit/rdm/data/committees - tmp_dir: cds_migrator_kit/rdm/tmp/committees - log_dir: cds_migrator_kit/rdm/log/committees + drdc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/drdc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/drdc + log_dir: cds_migrator_kit/rdm/log/committees/drdc extract: - dirpath: cds_migrator_kit/rdm/data/committees/dump/ + dirpath: cds_migrator_kit/rdm/data/committees/drdc transform: files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ missing_users: cds_migrator_kit/rdm/data/users communities_ids: - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" - former_exp: - data_dir: cds_migrator_kit/rdm/data/former_experiments/public - tmp_dir: cds_migrator_kit/rdm/tmp/former_experiments/public - log_dir: cds_migrator_kit/rdm/log/former_experiments/public + - "9eff4bab-9e5e-4cdb-9d16-be6588f7c6c1" + eec_comm: + data_dir: cds_migrator_kit/rdm/data/committees/eec + tmp_dir: cds_migrator_kit/rdm/tmp/committees/eec + log_dir: cds_migrator_kit/rdm/log/committees/eec extract: - dirpath: cds_migrator_kit/rdm/data/former_experiments/public/dump/ + dirpath: cds_migrator_kit/rdm/data/committees/eec/ transform: - files_dump_dir: cds_migrator_kit/rdm/data/former_experiments/public/files/ + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "71a01272-00c7-48d2-9140-487d01ae8367" + emc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/emc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/emc + log_dir: cds_migrator_kit/rdm/log/committees/emc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/emc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "7b33dda1-a8b0-4d6b-808f-707fa3e63828" + isc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/isc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/isc + log_dir: cds_migrator_kit/rdm/log/committees/isc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/isc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "12019bcd-28f2-40ce-85f3-3bfff399e847" + istc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/istc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/istc + log_dir: cds_migrator_kit/rdm/log/committees/istc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/istc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "84c28df7-fbee-4332-883b-7b9b77480e1f" + isrc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/isrc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/isrc + log_dir: cds_migrator_kit/rdm/log/committees/isrc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/isrc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "2285af28-1808-4f11-aefb-64a5699940bb" + lepc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/lepc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/lepc + log_dir: cds_migrator_kit/rdm/log/committees/lepc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/lepc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "a276bb34-e6ba-47c8-aa18-0ade1eec8b1d" + nprc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/nprc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/nprc + log_dir: cds_migrator_kit/rdm/log/committees/nprc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/nprc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "5c7c772d-8513-49d3-bd43-ac9a8beec02d" + nsc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/nsc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/nsc + log_dir: cds_migrator_kit/rdm/log/committees/nsc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/nsc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "847936da-b488-4426-b9aa-3659fa473779" + phi_comm: + data_dir: cds_migrator_kit/rdm/data/committees/phi + tmp_dir: cds_migrator_kit/rdm/tmp/committees/phi + log_dir: cds_migrator_kit/rdm/log/committees/phi + extract: + dirpath: cds_migrator_kit/rdm/data/committees/phi + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "2d9b3d0c-930a-4231-8c3e-55feaf9150c0" + phi3_comm: + data_dir: cds_migrator_kit/rdm/data/committees/phi3 + tmp_dir: cds_migrator_kit/rdm/tmp/committees/phi3 + log_dir: cds_migrator_kit/rdm/log/committees/phi3 + extract: + dirpath: cds_migrator_kit/rdm/data/committees/phi3 + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "c35d2da3-8a47-420b-879e-be87b8b602c7" + psc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/psc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/psc + log_dir: cds_migrator_kit/rdm/log/committees/psc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/psc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "9d91ff75-573c-48db-a1c1-4f78b0a9dced" + pscc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/pscc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/pscc + log_dir: cds_migrator_kit/rdm/log/committees/pscc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/pscc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "dd2c1204-4508-43d6-a7ac-7ec5e1394523" + scc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/scc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/scc + log_dir: cds_migrator_kit/rdm/log/committees/scc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/scc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "28bf99b9-1e72-405a-b95a-828019831def" + scps_comm: + data_dir: cds_migrator_kit/rdm/data/committees/scps + tmp_dir: cds_migrator_kit/rdm/tmp/committees/scps + log_dir: cds_migrator_kit/rdm/log/committees/scps + extract: + dirpath: cds_migrator_kit/rdm/data/committees/scps + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "dbcc1269-a481-4d40-8ef9-66305c831840" + spsc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/spsc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/spsc + log_dir: cds_migrator_kit/rdm/log/committees/spsc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/spsc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "0ce97700-d80e-4bbb-89c4-690be8f37817" + spslc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/spslc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/spslc + log_dir: cds_migrator_kit/rdm/log/committees/spslc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/spslc + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "4db1a084-4280-401d-a260-54229944af1e" + tcc_comm: + data_dir: cds_migrator_kit/rdm/data/committees/tcc + tmp_dir: cds_migrator_kit/rdm/tmp/committees/tcc + log_dir: cds_migrator_kit/rdm/log/committees/tcc + extract: + dirpath: cds_migrator_kit/rdm/data/committees/tcc/dump/ + transform: + files_dump_dir: cds_migrator_kit/rdm/data/committees/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "2ca82423-148a-42b5-86ff-03336b961c8e" + antares: + data_dir: cds_migrator_kit/rdm/data/former_exp/antares + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/antares + log_dir: cds_migrator_kit/rdm/log/former_exp/antares + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/antares + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "e2d2ed70-8a3c-453b-aa5b-1fa985f0906b" + antares_restr: + data_dir: cds_migrator_kit/rdm/data/former_exp/antares_restr + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/antares_restr + log_dir: cds_migrator_kit/rdm/log/former_exp/antares_restr + restricted: "True" + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/antares_restr + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "e2d2ed70-8a3c-453b-aa5b-1fa985f0906b" + dirac: + data_dir: cds_migrator_kit/rdm/data/former_exp/dirac + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/dirac + log_dir: cds_migrator_kit/rdm/log/former_exp/dirac + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/dirac + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "6a289642-5378-4daf-87b5-bb58af00487a" + harp_cdp: + data_dir: cds_migrator_kit/rdm/data/former_exp/harp_cdp + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/harp_cdp + log_dir: cds_migrator_kit/rdm/log/former_exp/harp_cdp + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/harp_cdp + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "2541c76f-6c9b-415b-9a47-e814ec01f576" + imxgam: + data_dir: cds_migrator_kit/rdm/data/former_exp/imXgam + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/imXgam + log_dir: cds_migrator_kit/rdm/log/former_exp/imXgam + restricted: "True" + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/imXgam + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "88a5cdf4-974a-44d4-b145-d19ee6346bb8" + lcd_restr: + data_dir: cds_migrator_kit/rdm/data/former_exp/lcd_restr + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/lcd_restr + log_dir: cds_migrator_kit/rdm/log/former_exp/lcd_restr + restricted: "True" + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/lcd_restr + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "adc02716-780b-4bba-8e89-6316c9a11cf0" + lcd: + data_dir: cds_migrator_kit/rdm/data/former_exp/lcd + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/lcd + log_dir: cds_migrator_kit/rdm/log/former_exp/lcd + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/lcd + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "adc02716-780b-4bba-8e89-6316c9a11cf0" + re29: + data_dir: cds_migrator_kit/rdm/data/former_exp/re29 + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/re29 + log_dir: cds_migrator_kit/rdm/log/former_exp/re29 + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/re29 + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ missing_users: cds_migrator_kit/rdm/data/users communities_ids: - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" - former_exp_restr: - data_dir: cds_migrator_kit/rdm/data/former_experiments/restricted - tmp_dir: cds_migrator_kit/rdm/tmp/former_experiments/restricted - log_dir: cds_migrator_kit/rdm/log/former_experiments/restricted + - "aff67e22-9027-4401-a16f-1aaac2cd2c27" + re29_restr: + data_dir: cds_migrator_kit/rdm/data/former_exp/re29_restr + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/re29_restr + log_dir: cds_migrator_kit/rdm/log/former_exp/re29_restr restricted: "True" extract: - dirpath: cds_migrator_kit/rdm/data/former_experiments/restricted/dump/ + dirpath: cds_migrator_kit/rdm/data/former_exp/re29_restr + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "aff67e22-9027-4401-a16f-1aaac2cd2c27" + ua2: + data_dir: cds_migrator_kit/rdm/data/former_exp/ua2 + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/ua2 + log_dir: cds_migrator_kit/rdm/log/former_exp/ua2 + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/ua2 + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "4040d2c7-7648-4f1a-81fa-7112fec8d130" + ua4: + data_dir: cds_migrator_kit/rdm/data/former_exp/ua4 + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/ua4 + log_dir: cds_migrator_kit/rdm/log/former_exp/ua4 + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/ua4 + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "1b9239a7-5938-44e3-8ea8-2c530627f659" + ua5: + data_dir: cds_migrator_kit/rdm/data/former_exp/ua5 + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/ua5 + log_dir: cds_migrator_kit/rdm/log/former_exp/ua5 + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/ua5 + transform: + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ + missing_users: cds_migrator_kit/rdm/data/users + communities_ids: + - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "7996b881-2420-4791-ac17-00cf3c1e7196" + ua8: + data_dir: cds_migrator_kit/rdm/data/former_exp/ua8 + tmp_dir: cds_migrator_kit/rdm/tmp/former_exp/ua8 + log_dir: cds_migrator_kit/rdm/log/former_exp/ua8 + extract: + dirpath: cds_migrator_kit/rdm/data/former_exp/ua8 transform: - files_dump_dir: cds_migrator_kit/rdm/data/former_experiments/restricted/files/ + files_dump_dir: cds_migrator_kit/rdm/data/former_exp/files/ missing_users: cds_migrator_kit/rdm/data/users communities_ids: - "c2c46ab3-5fb4-4d86-83c6-5d9dc8392d6f" + - "2dbeb56c-b4ab-4cad-8d82-5a1f90499bd7" fap: data_dir: cds_migrator_kit/rdm/data/fap tmp_dir: cds_migrator_kit/rdm/tmp/fap diff --git a/cds_migrator_kit/transform/dumper.py b/cds_migrator_kit/transform/dumper.py index 3cf09faa..576a570f 100644 --- a/cds_migrator_kit/transform/dumper.py +++ b/cds_migrator_kit/transform/dumper.py @@ -10,7 +10,9 @@ import arrow from cds_dojson.marc21.utils import create_record +from cds_dojson.exceptions import MultipleModelsException, ModelMissingException +from cds_migrator_kit.errors import MultipleModelsMatched from cds_migrator_kit.transform import migrator_marc21 from cds_migrator_kit.transform.errors import LossyConversion @@ -74,7 +76,12 @@ def _prepare_revision(self, data): # exception handlers are passed in this way to avoid overriding # .do method implementation - json_converted_record = self.dojson_model.do(marc_record) + try: + json_converted_record = self.dojson_model.do(marc_record) + except MultipleModelsException as e: + raise MultipleModelsMatched(str(e)) + except ModelMissingException as e: + raise MultipleModelsMatched(str(e)) missing = self.dojson_model.missing(marc_record) if missing and self.raise_on_missing_rules: diff --git a/cds_migrator_kit/users/api.py b/cds_migrator_kit/users/api.py index cf940b39..7a96b5a9 100644 --- a/cds_migrator_kit/users/api.py +++ b/cds_migrator_kit/users/api.py @@ -40,10 +40,10 @@ def create_invenio_user(self, email, username): return user except IntegrityError as e: db.session.rollback() - email_username = email.split("@")[0] + email_username, domain = email.split("@")[0] user = User( email=email, - username=f"duplicated_{username}_{email_username}", + username=f"duplicated_{username}_{email_username}[at]{domain}", active=False, ) db.session.add(user) diff --git a/setup.cfg b/setup.cfg index 07732861..54169023 100644 --- a/setup.cfg +++ b/setup.cfg @@ -75,7 +75,8 @@ cds_migrator_kit.migrator.models = mous = cds_migrator_kit.rdm.records.transform.models.mous:mous_model bulletin_issue = cds_migrator_kit.rdm.records.transform.models.bulletin_issue:bull_issue_model yellow_rep = cds_migrator_kit.rdm.records.transform.models.yellow_reports:yellow_issue_model - books = cds_migrator_kit.rdm.records.transform.models.books:book_model +; putting the model on hold - it conflicts with other migrations +; books = cds_migrator_kit.rdm.records.transform.models.books:book_model courier = cds_migrator_kit.rdm.records.transform.models.courier:courier_issue_model beams = cds_migrator_kit.rdm.records.transform.models.beams:beams_model it = cds_migrator_kit.rdm.records.transform.models.it:it_model