Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cds_migrator_kit/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class RecordModelMissing(CDSMigrationException):
description = "[Record did not match any available model]"


class MultipleModelsMatched(CDSMigrationException):
"""Multiple models matched exception."""

description = "[Record matched multiple models]"

class UnexpectedValue(CDSMigrationException):
"""The corresponding value is unexpected."""

Expand Down
2 changes: 1 addition & 1 deletion cds_migrator_kit/rdm/migration_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def _(x): # needed to avoid start time failure with lazy strings
},
"inis": {
"label": _("INIS"),
"validator": schemes.is_inspire,
"validator": schemes.is_inis,
"datacite": "INIS",
},
"indico": {
Expand Down
5 changes: 4 additions & 1 deletion cds_migrator_kit/rdm/records/transform/models/research.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ResearchModel(CdsOverdo):
"300__a", # number of pages
"340__a", # TODO ignore material?
"540__3", # TODO still ignore the material of the license?
"540__9", # TODO still ignore the material of the license?
"542__3", # TODO still ignore the material of the license?
"595__i", # TODO ??
"695__e", # some inspire tag
Expand All @@ -50,10 +51,12 @@ class ResearchModel(CdsOverdo):
"773__x", # INSPIRE publication note
"773__t", # INSPIRE publication note
"773__0", # from SIS: can be ignored
"773__o", # from SIS: can be ignored
"773__z", # from SIS: can be ignored
"8564_8", # file id
"8564_s", # bibdoc id
"8564_x", # icon thumbnails sizes
# "8564_y", # file description - done by files dump
"8564_y", # file description - done by files dump, sometimes these are used for open access calculation
"8564_w", # system field
"913__y", # citation
"913__v", # citation
Expand Down
3 changes: 2 additions & 1 deletion cds_migrator_kit/rdm/records/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
MissingRequiredField,
RecordFlaggedCuration,
RestrictedFileDetected,
UnexpectedValue,
UnexpectedValue, MultipleModelsMatched,
)
from cds_migrator_kit.rdm.migration_config import (
RDM_RECORDS_IDENTIFIERS_SCHEMES,
Expand Down Expand Up @@ -847,6 +847,7 @@ def _transform(self, entry):
UnexpectedValue,
ManualImportRequired,
MissingRequiredField,
MultipleModelsMatched
) as e:
migration_logger.add_log(e, record=entry)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,6 @@ def validate_subject_scheme(subject_scheme, subfield, key):

is_controlled_subject = key == "65017" and (scheme in CONTROLLED_SUBJECTS_SCHEMES)

# Drop other subjects
if val_a.lower().strip() == "other subjects":
raise IgnoreKey("subjects")

if type(val_a) is tuple:
# sometimes keywords are stick in one tag, so they come out as tuple
s_values = val_a
Expand All @@ -176,7 +172,7 @@ def validate_subject_scheme(subject_scheme, subfield, key):
raise IgnoreKey("subjects")
else:
subject_value = val_a.strip()
if subject_value.lower() == "xx":
if subject_value.lower() in ["xx", "other subjects"]:
raise IgnoreKey("subjects")
_subjects = self.get("subjects", [])
# invalid schema = euproject info scheme = scheme
Expand Down Expand Up @@ -314,14 +310,16 @@ def report_number(self, key, value):
if not identifier:
if re.findall(udc_pattern, scheme):
raise IgnoreKey("identifiers")
elif scheme.startswith("CM-"):
elif scheme.upper().startswith("CM-"):
# barcode, to drop
raise IgnoreKey("identifiers")
elif scheme.upper().startswith("P00"):
# barcode, to drop
raise IgnoreKey("identifiers")
elif scheme.upper() == "CERN LIBRARY":
raise IgnoreKey("identifiers")
elif scheme.upper().startswith("B00"):
raise IgnoreKey("identifiers")
elif scheme.startswith("SCOO"):
identifier = scheme
scheme = "other"
Expand Down Expand Up @@ -818,6 +816,10 @@ def related_identifiers_787(self, key, value):
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-conferencepaper"},
},
"article":{
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-article"},
}
}

if recid:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def resource_type(self, key, value):
"opal_papers",
"aleph_papers",
"ps212_papers",
"slintnote",
]

committees = {
Expand Down Expand Up @@ -477,6 +478,7 @@ def resource_type(self, key, value):
for i, v in enumerate(
[
"conferencepaper",
"proceedings",
"bookchapter",
"itcerntalk",
"antarescerntalk",
Expand Down Expand Up @@ -513,6 +515,7 @@ def resource_type(self, key, value):
mapping = {
"preprint": {"id": "publication-preprint"},
"conferencepaper": {"id": "publication-conferencepaper"},
"proceedings": {"id": "publication-conferenceproceeding"},
"article": {"id": "publication-article"},
"note": {"id": "publication-technicalnote"},
"lcd-notes": {"id": "publication-technicalnote"},
Expand Down
Loading
Loading