From 19148a048d4a8f8afa753974fb76f5d1b2789b67 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 11 May 2026 16:42:37 -0400 Subject: [PATCH 01/14] Update .gitignore to include imap_processing/_version.py for local builds --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index bdc9cfa1d..78f24861b 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,11 @@ share/python-wheels/ MANIFEST .python-version +# Generated by poetry-dynamic-versioning during local installs/builds. +# Keep it untracked so environment-specific version substitutions do not dirty +# the worktree on every setup or test run. +imap_processing/_version.py + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. From faf20af0a9c9cbc05373771efd4f87d989f7a056 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 11 May 2026 17:35:25 -0400 Subject: [PATCH 02/14] Revert "Update .gitignore to include imap_processing/_version.py for local builds" This reverts commit 19148a048d4a8f8afa753974fb76f5d1b2789b67. --- .gitignore | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.gitignore b/.gitignore index 78f24861b..bdc9cfa1d 100644 --- a/.gitignore +++ b/.gitignore @@ -37,11 +37,6 @@ share/python-wheels/ MANIFEST .python-version -# Generated by poetry-dynamic-versioning during local installs/builds. -# Keep it untracked so environment-specific version substitutions do not dirty -# the worktree on every setup or test run. -imap_processing/_version.py - # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. From c2d1cc7c07510b07a7cf297b91c390274a51f86d Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 11 May 2026 16:42:37 -0400 Subject: [PATCH 03/14] Update .gitignore to include imap_processing/_version.py for local builds --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index bdc9cfa1d..78f24861b 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,11 @@ share/python-wheels/ MANIFEST .python-version +# Generated by poetry-dynamic-versioning during local installs/builds. +# Keep it untracked so environment-specific version substitutions do not dirty +# the worktree on every setup or test run. +imap_processing/_version.py + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. From 19c6c369129a93541aea28097b9bdb1068697ec6 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 11 May 2026 17:39:37 -0400 Subject: [PATCH 04/14] fix(swe): correct L2 acq_duration FILLVAL for CDF_UINT4 SWE L2 writes acq_duration as CDF_UINT4, but the L2 variable attribute template still used the signed int64 fill sentinel. When the CDF is written, cdflib coerces FILLVAL to the variable's actual CDF type, so the negative sentinel wraps to 0 for the emitted UINT4 field. That produces a non-standard L2 CDF and causes SPDF to report: acq_duration FILLVAL value of '0' is non-standard. The recommended value is '4294967295'. Fix the L2 metadata template to use the correct UINT4 fill value (4294967295) for acq_duration. Also extend the existing SWE L2 end-to-end test to inspect the written CDF directly with cdflib and verify that acq_duration is emitted as CDF_UINT4 with the expected FILLVAL. This gives us a regression test on the actual serialized CDF metadata, not just the in-memory dataset. This change is intentionally scoped to SWE L2. L1 metadata is unchanged. --- imap_processing/cdf/config/imap_swe_l2_variable_attrs.yaml | 2 +- imap_processing/tests/swe/test_swe_l2.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/imap_processing/cdf/config/imap_swe_l2_variable_attrs.yaml b/imap_processing/cdf/config/imap_swe_l2_variable_attrs.yaml index c61e8494b..5cc3fd220 100644 --- a/imap_processing/cdf/config/imap_swe_l2_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_swe_l2_variable_attrs.yaml @@ -291,7 +291,7 @@ acq_duration: DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Array DISPLAY_TYPE: spectrogram FIELDNAM: Acquisition Duration - FILLVAL: -9223372036854775808 + FILLVAL: 4294967295 FORMAT: I10 LABL_PTR_1: esa_step_label LABL_PTR_2: spin_sector_label diff --git a/imap_processing/tests/swe/test_swe_l2.py b/imap_processing/tests/swe/test_swe_l2.py index f86c1c380..983ac316e 100644 --- a/imap_processing/tests/swe/test_swe_l2.py +++ b/imap_processing/tests/swe/test_swe_l2.py @@ -1,5 +1,6 @@ from unittest.mock import patch +import cdflib import numpy as np import pytest import xarray as xr @@ -378,6 +379,11 @@ def get_file_paths_side_effect(descriptor): l2_dataset.attrs["Data_version"] = "002" l2_cdf_filepath = write_cdf(l2_dataset) assert l2_cdf_filepath.name == "imap_swe_l2_sci_20240510_v002.cdf" + cdf_file = cdflib.CDF(l2_cdf_filepath) + acq_duration_info = cdf_file.varinq("acq_duration") + acq_duration_attrs = cdf_file.varattsget("acq_duration") + assert acq_duration_info.Data_Type_Description == "CDF_UINT4" + assert acq_duration_attrs["FILLVAL"] == np.uint32(4294967295) # --------- sector validation-------- sector_psd_data = l2_dataset["phase_space_density_spin_sector"].data From 3acd5ed947e3af46263791bcf30f3c508e63f10b Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 10:41:27 -0400 Subject: [PATCH 05/14] fix(istp): align emitted CDF FILLVAL metadata with actual variable types Several products were writing variables with CDF types that did not match the configured FILLVAL and FORMAT metadata. The science values were otherwise fine, but the emitted CDFs failed SPDF/ISTP checks because the metadata still described the pre-coercion or intended type rather than the serialized type. Update the staged products so their attrs match what is actually written: - CoDICE lo-direct-events - change data_quality to use the UINT2 fill value - change spin_sector to use floating-point fill/format because the written variable is CDF_DOUBLE after invalid positions are represented as NaN - CoDICE hi-direct-events - change data_quality to use the UINT2 fill value - CoDICE lo-sw-species / lo-nsw-species - restore nso_esa_step and nso_spin_sector to uint8 before write-out - replace non-finite values with the integer fill sentinel and recast to CDF_UINT1 semantics instead of leaving these support vars widened to float - HIT L2 - correct dynamic_threshold_state FILLVAL from the signed sentinel to the UINT1 sentinel expected by the emitted CDF - IDEX - treat spin_phase as floating-point metadata in the attr template so its FILLVAL and FORMAT match the serialized CDF_DOUBLE variable - MAG L2 - correct vector fill metadata to use the standard floating-point fill value used by the emitted magnetic field vectors - SWAPI - correct esa_energy FILLVAL and FORMAT to the floating-point form used by the written support variable Also add CDF-level regression coverage for the products exercised in this change: - CoDICE tests now inspect the written CDF metadata for nso_esa_step, nso_spin_sector, lo-direct-events spin_sector, and direct-events data_quality - HIT tests now verify dynamic_threshold_state is written as CDF_UINT1 with FILLVAL 255 - IDEX tests now verify spin_phase is written as CDF_DOUBLE with the expected floating-point fill value This change is intentionally focused on the staged fill-value/type mismatches. It does not include the separate SWE fix or the remaining CoDICE epoch_delta_* cleanup. --- ...ce_l2-hi-direct-events_variable_attrs.yaml | 2 +- ...ce_l2-lo-direct-events_variable_attrs.yaml | 6 ++--- .../config/imap_hit_l2_variable_attrs.yaml | 3 +-- .../config/imap_idex_l1b_variable_attrs.yaml | 5 ++-- .../config/imap_mag_l2_variable_attrs.yaml | 2 +- .../cdf/config/imap_swapi_variable_attrs.yaml | 4 ++-- imap_processing/codice/codice_l2.py | 16 +++++++++++++ .../tests/codice/test_codice_l2.py | 23 ++++++++++++++++++- imap_processing/tests/hit/test_hit_l2.py | 9 ++++++++ imap_processing/tests/idex/conftest.py | 2 +- imap_processing/tests/idex/test_idex_l2a.py | 8 ++++++- 11 files changed, 65 insertions(+), 15 deletions(-) diff --git a/imap_processing/cdf/config/imap_codice_l2-hi-direct-events_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-hi-direct-events_variable_attrs.yaml index e7e281457..65f43bba3 100644 --- a/imap_processing/cdf/config/imap_codice_l2-hi-direct-events_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-hi-direct-events_variable_attrs.yaml @@ -84,7 +84,7 @@ data_quality: DEPEND_1: priority DICT_KEY: SPASE>Support>SupportQuantity:DataQuality FIELDNAM: Data Quality - FILLVAL: *uint8_fillval + FILLVAL: *uint16_fillval FORMAT: I3 LABLAXIS: Data Quality LABL_PTR_1: priority_label diff --git a/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml index 35ffbdf76..5db436769 100644 --- a/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml @@ -82,7 +82,7 @@ data_quality: DEPEND_1: priority DICT_KEY: SPASE>Support>SupportQuantity:DataQuality FIELDNAM: Data Quality - FILLVAL: *uint8_fillval + FILLVAL: *uint16_fillval FORMAT: I3 LABL_PTR_1: priority_label SCALETYP: linear @@ -260,8 +260,8 @@ spin_sector: DEPEND_2: event_num DICT_KEY: SPASE>Support>SupportQuantity:Positional FIELDNAM: Spin Sector Index - FILLVAL: *uint8_fillval - FORMAT: I2 + FILLVAL: *real_fillval + FORMAT: F8.1 LABLAXIS: Spin Sector LABL_PTR_1: priority_label LABL_PTR_2: event_num_label diff --git a/imap_processing/cdf/config/imap_hit_l2_variable_attrs.yaml b/imap_processing/cdf/config/imap_hit_l2_variable_attrs.yaml index 0fa85ae79..b782d93ee 100644 --- a/imap_processing/cdf/config/imap_hit_l2_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_hit_l2_variable_attrs.yaml @@ -331,7 +331,7 @@ dynamic_threshold_state: DICT_KEY: SPASE>Support>SupportQuantity:InstrumentMode DISPLAY_TYPE: time_series FIELDNAM: Dynamic threshold state - FILLVAL: -128 + FILLVAL: 255 FORMAT: I1 LABLAXIS: State SCALEMAX: 1000 @@ -1997,4 +1997,3 @@ fe_total_uncert_minus_macropixel: - diff --git a/imap_processing/cdf/config/imap_idex_l1b_variable_attrs.yaml b/imap_processing/cdf/config/imap_idex_l1b_variable_attrs.yaml index 0739e5b1f..b18f6c54d 100644 --- a/imap_processing/cdf/config/imap_idex_l1b_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_idex_l1b_variable_attrs.yaml @@ -483,8 +483,8 @@ spin_phase: CATDESC: IMAP Spin Phase DICT_KEY: SPASE>Support>SupportQuantity:SpinPhase FIELDNAM: Spin Phase - FILLVAL: *int_fillval - FORMAT: I3 + FILLVAL: *double_fillval + FORMAT: F8.3 LABLAXIS: Spin Phase UNITS: Degrees VALIDMAX: 360 @@ -499,4 +499,3 @@ solar_longitude: UNITS: Degrees VALIDMAX: 180 VALIDMIN: -180 - diff --git a/imap_processing/cdf/config/imap_mag_l2_variable_attrs.yaml b/imap_processing/cdf/config/imap_mag_l2_variable_attrs.yaml index bb193c828..6d376c8e4 100644 --- a/imap_processing/cdf/config/imap_mag_l2_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_mag_l2_variable_attrs.yaml @@ -49,7 +49,7 @@ vector_attrs: &vectors_default DEPEND_1: direction DICT_KEY: "SPASE>Field>FieldQuantity:Magnetic,Qualifier:Vector,CoordinateSystemName:DSRF,CoordinateRepresentation:Cartesian" FIELDNAM: Magnetic Field Vector - FILLVAL: 9223372036854775807 + FILLVAL: -1.0e31 FORMAT: F12.5 LABL_PTR_1: direction_label diff --git a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml index 30b0a3037..571947887 100644 --- a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml @@ -107,8 +107,8 @@ esa_energy: DEPEND_1: esa_step DICT_KEY: SPASE>Particle>ParticleType:Ion,ParticleQuantity:EnergyPerCharge FIELDNAM: ESA Energy - FILLVAL: -9223372036854775808 - FORMAT: I5 + FILLVAL: -1.0000000E+31 + FORMAT: F8.1 LABLAXIS: Energy(eV) LABL_PTR_1: esa_step_label SCALETYP: linear diff --git a/imap_processing/codice/codice_l2.py b/imap_processing/codice/codice_l2.py index 656727807..3e7e2ee6c 100644 --- a/imap_processing/codice/codice_l2.py +++ b/imap_processing/codice/codice_l2.py @@ -568,6 +568,22 @@ def process_lo_species_intensity( for species in species_list: dataset[species].data[half_spin_boundary] = np.nan + for var in ["nso_esa_step", "nso_spin_sector"]: + if var in dataset: + fillval = dataset[var].attrs["FILLVAL"] + restored_values = dataset[var].data.astype(np.float64, copy=True) + restored_values = np.nan_to_num( + restored_values, nan=fillval, posinf=fillval, neginf=fillval + ) + restored_values = np.clip(np.rint(restored_values), 0, 255).astype( + np.uint8 + ) + dataset[var] = xr.DataArray( + restored_values, + dims=dataset[var].dims, + attrs=dataset[var].attrs, + ) + return dataset diff --git a/imap_processing/tests/codice/test_codice_l2.py b/imap_processing/tests/codice/test_codice_l2.py index ae738e00b..4e7a6c7e4 100644 --- a/imap_processing/tests/codice/test_codice_l2.py +++ b/imap_processing/tests/codice/test_codice_l2.py @@ -3,6 +3,7 @@ from unittest import mock from unittest.mock import MagicMock, patch +import cdflib import numpy as np import pandas as pd import pytest @@ -453,7 +454,13 @@ def test_codice_l2_sw_species_intensity(mock_get_file_paths, codice_lut_path): ) processed_2_ds.attrs["Data_version"] = "001" assert processed_2_ds.attrs["Logical_source"] == "imap_codice_l2_lo-sw-species" - write_cdf(processed_2_ds) + cdf_file_path = write_cdf(processed_2_ds) + cdf_file = cdflib.CDF(cdf_file_path) + for var in ["nso_esa_step", "nso_spin_sector"]: + var_info = cdf_file.varinq(var) + var_attrs = cdf_file.varattsget(var) + assert var_info.Data_Type_Description == "CDF_UINT1" + assert var_attrs["FILLVAL"] == np.uint8(255) @patch("imap_data_access.processing_input.ProcessingInputCollection.get_file_paths") @@ -666,6 +673,15 @@ def test_codice_l2_lo_de(mock_get_file_paths, codice_lut_path): file = write_cdf(processed_l2_ds) errors = CDFValidator().validate(file) assert not errors + cdf_file = cdflib.CDF(file) + spin_sector_info = cdf_file.varinq("spin_sector") + spin_sector_attrs = cdf_file.varattsget("spin_sector") + data_quality_info = cdf_file.varinq("data_quality") + data_quality_attrs = cdf_file.varattsget("data_quality") + assert spin_sector_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(spin_sector_attrs["FILLVAL"], np.float64(-1.0e31)) + assert data_quality_info.Data_Type_Description == "CDF_UINT2" + assert data_quality_attrs["FILLVAL"] == np.uint16(65535) load_cdf(file) @@ -719,4 +735,9 @@ def test_codice_l2_hi_de(mock_get_file_paths, codice_lut_path): file = write_cdf(processed_l2_ds) errors = CDFValidator().validate(file) assert not errors + cdf_file = cdflib.CDF(file) + data_quality_info = cdf_file.varinq("data_quality") + data_quality_attrs = cdf_file.varattsget("data_quality") + assert data_quality_info.Data_Type_Description == "CDF_UINT2" + assert data_quality_attrs["FILLVAL"] == np.uint16(65535) load_cdf(file) diff --git a/imap_processing/tests/hit/test_hit_l2.py b/imap_processing/tests/hit/test_hit_l2.py index 85164726b..5121415e3 100644 --- a/imap_processing/tests/hit/test_hit_l2.py +++ b/imap_processing/tests/hit/test_hit_l2.py @@ -1,11 +1,13 @@ from unittest.mock import Mock, patch +import cdflib import numpy as np import pandas as pd import pytest import xarray as xr from imap_processing import imap_module_directory +from imap_processing.cdf.utils import write_cdf from imap_processing.hit.l1a import hit_l1a from imap_processing.hit.l1b.hit_l1b import ( SUMMED_PARTICLE_ENERGY_RANGE_MAPPING, @@ -883,3 +885,10 @@ def test_hit_l2( dependencies[dataset_key], ancillary_dependencies[ancillary_key] ) assert l2_dataset.attrs["Logical_source"] == expected_logical_source + l2_dataset.attrs["Data_version"] = "001" + l2_cdf_filepath = write_cdf(l2_dataset) + cdf_file = cdflib.CDF(l2_cdf_filepath) + dynamic_threshold_info = cdf_file.varinq("dynamic_threshold_state") + dynamic_threshold_attrs = cdf_file.varattsget("dynamic_threshold_state") + assert dynamic_threshold_info.Data_Type_Description == "CDF_UINT1" + assert dynamic_threshold_attrs["FILLVAL"] == np.uint8(255) diff --git a/imap_processing/tests/idex/conftest.py b/imap_processing/tests/idex/conftest.py index d2adb16d7..91188ac43 100644 --- a/imap_processing/tests/idex/conftest.py +++ b/imap_processing/tests/idex/conftest.py @@ -143,7 +143,7 @@ def get_spice_data_side_effect_func(l1a_ds, idex_attrs): spin_phase_angles = xr.DataArray( name="spin_phase", dims=["epoch"], - data=np.random.randint(0, 360, len(l1a_ds.epoch)), + data=np.random.uniform(0.0, 360.0, len(l1a_ds.epoch)), attrs=idex_attrs.get_variable_attributes("spin_phase"), ) longitude = xr.DataArray( diff --git a/imap_processing/tests/idex/test_idex_l2a.py b/imap_processing/tests/idex/test_idex_l2a.py index ba04f2bd9..0d85cf9c5 100644 --- a/imap_processing/tests/idex/test_idex_l2a.py +++ b/imap_processing/tests/idex/test_idex_l2a.py @@ -2,6 +2,7 @@ from unittest import mock +import cdflib import numpy as np import pandas as pd import pytest @@ -41,7 +42,7 @@ def l2a_dataset( """ idex_attrs = get_idex_attrs("l1b") spin_phase_angles = xr.DataArray( - np.random.randint(0, 360, len(l1b_dataset.epoch)), + np.random.uniform(0.0, 360.0, len(l1b_dataset.epoch)), dims="epoch", attrs=idex_attrs.get_variable_attributes("spin_phase"), ) @@ -83,6 +84,11 @@ def test_l2a_logical_source_and_cdf(l2a_dataset: xr.Dataset): file_name = write_cdf(l2a_dataset) assert file_name.exists() assert file_name.name == "imap_idex_l2a_sci-1week_20231218_v999.cdf" + cdf_file = cdflib.CDF(file_name) + spin_phase_info = cdf_file.varinq("spin_phase") + spin_phase_attrs = cdf_file.varattsget("spin_phase") + assert spin_phase_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(spin_phase_attrs["FILLVAL"], np.float64(-1.0e31)) expected_vars = [ "tof_snr", From d3a64b62344a5b81073efbc1710d4a60180b065e Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 10:42:00 -0400 Subject: [PATCH 06/14] fix(tests): add CDF FILLVAL assertions for mag_l2 and swapi_l2 tests --- imap_processing/tests/mag/test_mag_l2.py | 9 +++++++++ imap_processing/tests/swapi/test_swapi_l2.py | 6 ++++++ 2 files changed, 15 insertions(+) diff --git a/imap_processing/tests/mag/test_mag_l2.py b/imap_processing/tests/mag/test_mag_l2.py index 12f26ffd1..51aa43280 100644 --- a/imap_processing/tests/mag/test_mag_l2.py +++ b/imap_processing/tests/mag/test_mag_l2.py @@ -1,9 +1,11 @@ from unittest.mock import patch +import cdflib import numpy as np import pytest import xarray as xr +from imap_processing.cdf.utils import write_cdf from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes from imap_processing.mag.constants import FILLVAL, DataMode from imap_processing.mag.l2.mag_l2 import mag_l2, retrieve_matrix_from_l2_calibration @@ -140,6 +142,13 @@ def test_mag_l2(norm_dataset, mag_test_l2_data): for i, dataset in enumerate(l2): assert expected_frames[i].var_name in dataset.data_vars assert expected_frames[i].name in dataset.attrs["Data_type"] + dataset.attrs["Data_version"] = "001" + cdf_filepath = write_cdf(dataset) + cdf_file = cdflib.CDF(cdf_filepath) + vector_info = cdf_file.varinq(expected_frames[i].var_name) + vector_attrs = cdf_file.varattsget(expected_frames[i].var_name) + assert vector_info.Data_Type_Description == "CDF_FLOAT" + assert np.isclose(vector_attrs["FILLVAL"], np.float32(-1.0e31)) def test_mag_l2_some_epochs_not_in_spice(norm_dataset, mag_test_l2_data): diff --git a/imap_processing/tests/swapi/test_swapi_l2.py b/imap_processing/tests/swapi/test_swapi_l2.py index 07a10cf38..a78f3bedc 100644 --- a/imap_processing/tests/swapi/test_swapi_l2.py +++ b/imap_processing/tests/swapi/test_swapi_l2.py @@ -1,6 +1,7 @@ import json from unittest.mock import patch +import cdflib import numpy as np import pandas as pd import pytest @@ -125,6 +126,11 @@ def second_get_file_paths_side_effect(descriptor): ) l2_cdf = write_cdf(l2_dataset) assert l2_cdf.name == "imap_swapi_l2_sci_20240924_v999.cdf" + cdf_file = cdflib.CDF(l2_cdf) + esa_energy_info = cdf_file.varinq("esa_energy") + esa_energy_attrs = cdf_file.varattsget("esa_energy") + assert esa_energy_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(esa_energy_attrs["FILLVAL"], np.float64(-1.0e31)) # Test uncertainty variables are as expected np.testing.assert_array_equal( From e6502001594a75d6c788bfadf30e67167faaf4b9 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 11:30:14 -0400 Subject: [PATCH 07/14] fix(glows): preserve L2 histogram variable dtypes during CDF padding GLOWS L2 re-expands daily lightcurve arrays back to STANDARD_BIN_COUNT before writing the final CDF. That padding step was using `np.full(..., fillval)` without controlling the dtype, which let NumPy choose a default signed integer type based on the fill value. That caused two metadata/type mismatches in the emitted CDF: - `histogram_flag_array` is logically an 8-bit flag array, but padding with the fill value widened it away from `uint8` - `number_of_bins` was written from a plain Python integer, so it did not reliably preserve the intended unsigned-16 representation SPDF then flagged the resulting files because the serialized CDF types and FILLVALs no longer matched the intended product schema. Fix this in two parts: 1. Make the GLOWS L2 schema explicit - declare `histogram_flag_array` as `CDF_UINT1` - declare `number_of_bins` as `CDF_UINT2` - update `number_of_bins` to use the `65535` UINT2 fill sentinel 2. Preserve array dtypes when rebuilding padded bin arrays - write `number_of_bins` explicitly as `np.uint16` - when padding bin-dimensioned daily lightcurve arrays, start from the existing array dtype instead of letting `np.full` infer one from the fill value - if the configured fill value fits exactly in the current integer dtype, keep that dtype - only widen when the fill value cannot be represented without changing value This keeps integer support arrays like `histogram_flag_array` in their intended unsigned type while still allowing float arrays such as `photon_flux` and `ecliptic_lon` to remain floating-point. Add regression coverage for both the in-memory and written-CDF paths: - verify `create_l2_dataset()` keeps `photon_flux` and `ecliptic_lon` as floating arrays after padding - verify the emitted CDF writes - `histogram_flag_array` as `CDF_UINT1` with `FILLVAL=255` - `number_of_bins` as `CDF_UINT2` with `FILLVAL=65535` - `photon_flux` and `ecliptic_lon` as floating-point variables with the expected real fill value --- .../config/imap_glows_l2_variable_attrs.yaml | 4 +- imap_processing/glows/l2/glows_l2.py | 20 +++++- imap_processing/tests/glows/test_glows_l2.py | 67 +++++++++++++++++-- 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml b/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml index 209b93e09..564895922 100644 --- a/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml @@ -483,6 +483,7 @@ flux_uncertainties: histogram_flag_array: <<: *lightcurve_defaults CATDESC: Bad-angle flags for histogram bins + CDF_DATA_TYPE: CDF_UINT1 DICT_KEY: SPASE>Support>SupportQuantity:DataQuality FIELDNAM: Bad-angle flags for histogram FILLVAL: 255 @@ -522,9 +523,10 @@ ecliptic_lat: number_of_bins: <<: *support_data_defaults CATDESC: Number of bins in histogram + CDF_DATA_TYPE: CDF_UINT2 DICT_KEY: SPASE>Support>SupportQuantity:Other FIELDNAM: Number of bins in histogram - FILLVAL: -9223372036854775808 + FILLVAL: *max_uint16 FORMAT: I4 LABLAXIS: No. of bins UNITS: ' ' diff --git a/imap_processing/glows/l2/glows_l2.py b/imap_processing/glows/l2/glows_l2.py index e6f08e0e9..1374f998f 100644 --- a/imap_processing/glows/l2/glows_l2.py +++ b/imap_processing/glows/l2/glows_l2.py @@ -210,7 +210,7 @@ def create_l2_dataset( if key == "number_of_bins": # number_of_bins does not have a bins dimension. output[key] = xr.DataArray( - np.array([value]), + np.array([value], dtype=np.uint16), dims=["epoch"], attrs=attrs.get_variable_attributes(key), ) @@ -220,8 +220,22 @@ def create_l2_dataset( # here, filling unused bins with the variable's CDF FILLVAL. var_attrs = attrs.get_variable_attributes(key) fillval = var_attrs["FILLVAL"] - padded = np.full(GlowsConstants.STANDARD_BIN_COUNT, fillval) - padded[:n_bins] = value + value_array = np.asarray(value) + padded_dtype = value_array.dtype + if np.issubdtype(padded_dtype, np.integer): + try: + cast_fillval = np.array(fillval, dtype=padded_dtype).item() + except (OverflowError, TypeError, ValueError): + padded_dtype = np.result_type(padded_dtype, np.asarray(fillval).dtype) + else: + if cast_fillval != fillval: + padded_dtype = np.result_type( + padded_dtype, np.asarray(fillval).dtype + ) + padded = np.full( + GlowsConstants.STANDARD_BIN_COUNT, fillval, dtype=padded_dtype + ) + padded[:n_bins] = value_array output[key] = xr.DataArray( np.array([padded]), dims=["epoch", "bins"], diff --git a/imap_processing/tests/glows/test_glows_l2.py b/imap_processing/tests/glows/test_glows_l2.py index b4e40de25..29ecff9ad 100644 --- a/imap_processing/tests/glows/test_glows_l2.py +++ b/imap_processing/tests/glows/test_glows_l2.py @@ -1,10 +1,12 @@ from unittest.mock import patch +import cdflib import numpy as np import pytest import xarray as xr from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes +from imap_processing.cdf.utils import write_cdf from imap_processing.glows.l1b.glows_l1b import glows_l1b from imap_processing.glows.l1b.glows_l1b_data import ( HistogramL1B, @@ -169,10 +171,10 @@ def test_generate_l2( cdf_attrs = ImapCdfAttributes() cdf_attrs.add_instrument_global_attrs("glows") cdf_attrs.add_instrument_variable_attrs("glows", "l2") - assert ( - create_l2_dataset(l2, cdf_attrs, l1b_hist_dataset.attrs)["epoch"].data[0] - == (l2.start_time + l2.end_time) / 2 - ) + output = create_l2_dataset(l2, cdf_attrs, l1b_hist_dataset.attrs) + assert output["epoch"].data[0] == (l2.start_time + l2.end_time) / 2 + for var in ["photon_flux", "ecliptic_lon"]: + assert np.issubdtype(output[var].dtype, np.floating) # Test case 2: L1B dataset has no good times (all flags 0) l1b_hist_dataset["flags"].values = np.zeros(l1b_hist_dataset.flags.shape) @@ -182,6 +184,63 @@ def test_generate_l2( assert ds.bad_time_flag_occurrences.dtype == np.uint16 +@patch.object(HistogramL2, "compute_position_angle", return_value=42.0) +@patch.object( + HistogramL1B, + "flag_uv_and_excluded", + return_value=(np.zeros(3600, dtype=bool), np.zeros(3600, dtype=bool)), +) +@patch.object(HistogramL1B, "update_spice_parameters", autospec=True) +def test_glows_l2_cdf_fillvals( + mock_spice_function, + mock_flag_uv_and_excluded, + mock_compute_position_angle, + l1a_dataset, + mock_ancillary_exclusions, + mock_pipeline_settings, + mock_conversion_table_dict, + mock_ecliptic_bin_centers, + mock_calibration_dataset, +): + mock_spice_function.side_effect = mock_update_spice_parameters + + l1b_hist_dataset = glows_l1b( + l1a_dataset[0], + mock_ancillary_exclusions.excluded_regions, + mock_ancillary_exclusions.uv_sources, + mock_ancillary_exclusions.suspected_transients, + mock_ancillary_exclusions.exclusions_by_instr_team, + mock_pipeline_settings, + mock_conversion_table_dict, + ) + l1b_hist_dataset.attrs["Repointing"] = "repoint00047" + + l2_dataset = glows_l2( + l1b_hist_dataset, mock_pipeline_settings, mock_calibration_dataset + )[0] + l2_dataset.attrs["Data_version"] = "001" + cdf_file_path = write_cdf(l2_dataset) + cdf_file = cdflib.CDF(cdf_file_path) + + histogram_flag_info = cdf_file.varinq("histogram_flag_array") + histogram_flag_attrs = cdf_file.varattsget("histogram_flag_array") + number_of_bins_info = cdf_file.varinq("number_of_bins") + number_of_bins_attrs = cdf_file.varattsget("number_of_bins") + photon_flux_info = cdf_file.varinq("photon_flux") + photon_flux_attrs = cdf_file.varattsget("photon_flux") + ecliptic_lon_info = cdf_file.varinq("ecliptic_lon") + ecliptic_lon_attrs = cdf_file.varattsget("ecliptic_lon") + + assert histogram_flag_info.Data_Type_Description == "CDF_UINT1" + assert histogram_flag_attrs["FILLVAL"] == np.uint8(255) + assert number_of_bins_info.Data_Type_Description == "CDF_UINT2" + assert number_of_bins_attrs["FILLVAL"] == np.uint16(65535) + assert photon_flux_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(photon_flux_attrs["FILLVAL"], np.float64(-1.0e31)) + assert ecliptic_lon_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(ecliptic_lon_attrs["FILLVAL"], np.float64(-1.0e31)) + + def test_bin_exclusions(l1b_hists): # TODO test excluding bins as well From 0c4f750f064bdb82da2204a06de705577d18fd41 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 15:02:05 -0400 Subject: [PATCH 08/14] fix(codice): normalize hi epoch_delta support vars in l1b and l2 CoDICE hi-omni and hi-sectored were still carrying epoch_delta_minus/epoch_delta_plus through L1B as integer-valued support variables even though the current schema intent for those fields is real-valued temporal support data. That mismatch then propagated into the serialized L2 products and conflicted with the float-style ISTP metadata needed for the written CDFs. Update the Hi L1B path to normalize epoch_delta_minus and epoch_delta_plus to float64 before CDF write-out for: - imap_codice_l1b_hi-omni - imap_codice_l1b_hi-sectored Also update the Hi L2 variable-attribute templates so the final products describe those variables consistently with the written type: - FILLVAL: real fill sentinel (-1.0e31) - FORMAT: floating-point format (F19.1) This keeps the fix split cleanly by responsibility: - L1B now emits the support variables with the intended dtype - L2 no longer advertises them with integer fill/format metadata Test coverage is updated in two layers: 1. L1B metadata regression - add a parameterized CDF-level test for hi-omni and hi-sectored - assert epoch_delta_minus and epoch_delta_plus are written as CDF_DOUBLE with FILLVAL -1.0e31 2. L2 regression and metadata verification - keep the existing validation-based L2 science regression tests - add a separate parameterized write-path test that generates fresh Hi L1B inputs, runs L2, writes the output CDF, and verifies the emitted epoch_delta variables are CDF_DOUBLE with the expected float fill value The generated-L1B L2 metadata test is intentional: the checked-in Hi L1B validation artifacts still predate this dtype fix, so they cannot by themselves verify the regenerated L1B -> L2 path. Science regression coverage stays anchored to the historical validation files, while the new metadata test proves the current pipeline output is correct. This change is limited to CoDICE Hi epoch_delta support-variable typing and metadata. It does not refresh the checked-in validation artifacts. --- ...imap_codice_l2-hi-omni_variable_attrs.yaml | 8 +- ..._codice_l2-hi-sectored_variable_attrs.yaml | 8 +- imap_processing/codice/codice_l1b.py | 15 ++++ .../tests/codice/test_codice_hi_l2.py | 88 ++++++++++++++++++- .../tests/codice/test_codice_l1b.py | 24 +++++ 5 files changed, 134 insertions(+), 9 deletions(-) diff --git a/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml index e131e34d2..eb8075eae 100644 --- a/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml @@ -26,8 +26,8 @@ epoch_delta_minus: CATDESC: Time from acquisition start to acquisition center DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta minus - FILLVAL: *min_int - FORMAT: I19 + FILLVAL: *real_fillval + FORMAT: F19.1 LABLAXIS: Epoch Delta Minus SCALETYP: linear UNITS: ns @@ -39,8 +39,8 @@ epoch_delta_plus: CATDESC: Time from acquisition center to acquisition end DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta plus - FILLVAL: *min_int - FORMAT: I19 + FILLVAL: *real_fillval + FORMAT: F19.1 LABLAXIS: Epoch Delta Plus SCALETYP: linear UNITS: ns diff --git a/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml index e583851b7..15c6e553e 100644 --- a/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml @@ -24,8 +24,8 @@ epoch_delta_minus: CATDESC: Time from acquisition start to acquisition center DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta minus - FILLVAL: *min_int - FORMAT: I19 + FILLVAL: *real_fillval + FORMAT: F19.1 LABLAXIS: Epoch Delta Minus SCALETYP: linear UNITS: ns @@ -37,8 +37,8 @@ epoch_delta_plus: CATDESC: Time from acquisition center to acquisition end DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta plus - FILLVAL: *min_int - FORMAT: I19 + FILLVAL: *real_fillval + FORMAT: F19.1 LABLAXIS: Epoch Delta Plus SCALETYP: linear UNITS: ns diff --git a/imap_processing/codice/codice_l1b.py b/imap_processing/codice/codice_l1b.py index e6805e5f7..44799cff3 100644 --- a/imap_processing/codice/codice_l1b.py +++ b/imap_processing/codice/codice_l1b.py @@ -22,6 +22,18 @@ logger = logging.getLogger(__name__) +def _cast_epoch_delta_vars_to_float64(dataset: xr.Dataset) -> xr.Dataset: + """Normalize epoch delta support vars to floating-point before CDF write-out.""" + for var in ["epoch_delta_plus", "epoch_delta_minus"]: + if var in dataset: + dataset[var] = xr.DataArray( + dataset[var].data.astype(np.float64), + dims=dataset[var].dims, + attrs=dataset[var].attrs, + ) + return dataset + + def convert_to_rates(dataset: xr.Dataset, descriptor: str) -> np.ndarray: """ Apply a conversion from counts to rates. @@ -187,6 +199,9 @@ def process_codice_l1b(file_path: Path) -> xr.Dataset: # Use the L1a data product as a starting point for L1b l1b_dataset = l1a_dataset.copy(deep=True) + if descriptor in ["hi-omni", "hi-sectored"]: + l1b_dataset = _cast_epoch_delta_vars_to_float64(l1b_dataset) + # Update the global attributes l1b_dataset.attrs = cdf_attrs.get_global_attributes(dataset_name) diff --git a/imap_processing/tests/codice/test_codice_hi_l2.py b/imap_processing/tests/codice/test_codice_hi_l2.py index af7f27716..1d2dee991 100644 --- a/imap_processing/tests/codice/test_codice_hi_l2.py +++ b/imap_processing/tests/codice/test_codice_hi_l2.py @@ -1,5 +1,6 @@ from unittest.mock import patch +import cdflib import numpy as np import pytest from imap_data_access.processing_input import ( @@ -10,6 +11,8 @@ from imap_processing import imap_module_directory from imap_processing.cdf.utils import load_cdf, write_cdf +from imap_processing.codice.codice_l1a import process_l1a +from imap_processing.codice.codice_l1b import process_codice_l1b from imap_processing.codice.codice_l2 import ( process_codice_l2, ) @@ -21,6 +24,56 @@ pytestmark = pytest.mark.external_test_data +def _generate_hi_l1b_file(descriptor: str, codice_lut_path): + """Generate a fresh Hi L1B CDF for metadata regression tests. + + We need this helper because the checked-in ``tests/codice/data/l1b_validation`` + artifacts predate the epoch-delta dtype fix and still serialize + ``epoch_delta_plus`` / ``epoch_delta_minus`` as integer CDF variables. + The L2 metadata tests below are specifically trying to verify the current + regenerated pipeline behavior, so they must consume an L1B file produced by + the current L1A -> L1B code path instead of the historical validation + artifact. + + If we later refresh the Hi L1B validation CDFs to include this dtype fix, + this helper can be removed and the L2 metadata tests can go back to using + the checked-in ``l1b_validation`` files directly. + """ + + def _lookup_l1_inputs(request_descriptor=None, data_type=None, **kwargs): + # ``process_l1a()`` asks for two different inputs through the same file + # lookup hook: + # 1. the raw science packet via ``data_type='l0'`` with no descriptor + # 2. the science LUT via ``descriptor='l1a-sci-lut'`` + # + # Patch the lookup so the real production code can run unchanged while + # the test routes those requests to the correct local test artifacts. + request_descriptor = kwargs.get("descriptor", request_descriptor) + if request_descriptor is None and data_type == "l0": + return codice_lut_path(descriptor, data_type="l0") + return codice_lut_path(request_descriptor, data_type) + + with patch( + "imap_data_access.processing_input.ProcessingInputCollection.get_file_paths" + ) as mock_get_file_paths: + mock_get_file_paths.side_effect = _lookup_l1_inputs + processed_l1a_file = write_cdf(process_l1a(ProcessingInputCollection())[0]) + processed_l1b = process_codice_l1b(processed_l1a_file) + processed_l1b.attrs["Data_version"] = "001" + return write_cdf(processed_l1b) + + +def _mock_l2_file_paths(descriptor: str, l1b_file, codice_lut_path): + """Return a side effect that points L2 processing at a generated L1B file.""" + def _side_effect(request_descriptor=None, data_type=None, **kwargs): + request_descriptor = kwargs.get("descriptor", request_descriptor) + if request_descriptor == descriptor: + return [l1b_file] + return codice_lut_path(request_descriptor, data_type) + + return _side_effect + + @pytest.fixture def mock_get_file_paths(codice_lut_path): with patch( @@ -70,7 +123,7 @@ def test_l2_hi_omni(mock_get_file_paths): # Tests that dimensions match assert processed_l2[variable].dims == val_data[variable].dims, ( f"Dimension mismatch in coordinate '{variable}'" - ) + ) processed_l2.attrs["Data_version"] = "001" omni_cdf_file = write_cdf(processed_l2) @@ -161,3 +214,36 @@ def test_l2_hi_sectored(mock_get_file_paths): sectored_cdf_file.name == f"imap_codice_l2_hi-sectored_{VALIDATION_FILE_DATE}_v001.cdf" ) + + +@pytest.mark.parametrize( + ("descriptor", "efficiency_file"), + [ + ("hi-omni", "imap_codice_l2-hi-omni-efficiency_20251212_v003.csv"), + ("hi-sectored", "imap_codice_l2-hi-sectored-efficiency_20251008_v001.csv"), + ], +) +def test_l2_hi_epoch_delta_cdf_metadata( + descriptor, efficiency_file, codice_lut_path +): + l1b_file = _generate_hi_l1b_file(descriptor, codice_lut_path) + dependencies = ProcessingInputCollection( + AncillaryInput(efficiency_file), + ScienceInput(l1b_file.name), + ) + + with patch( + "imap_data_access.processing_input.ProcessingInputCollection.get_file_paths" + ) as mock_get_file_paths: + mock_get_file_paths.side_effect = _mock_l2_file_paths( + descriptor, l1b_file, codice_lut_path + ) + processed_l2 = process_codice_l2(descriptor, dependencies) + + processed_l2.attrs["Data_version"] = "001" + cdf_file = cdflib.CDF(write_cdf(processed_l2)) + for var in ["epoch_delta_minus", "epoch_delta_plus"]: + var_info = cdf_file.varinq(var) + var_attrs = cdf_file.varattsget(var) + assert var_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(var_attrs["FILLVAL"], np.float64(-1.0e31)) diff --git a/imap_processing/tests/codice/test_codice_l1b.py b/imap_processing/tests/codice/test_codice_l1b.py index 77aa35d0c..96d418ac3 100644 --- a/imap_processing/tests/codice/test_codice_l1b.py +++ b/imap_processing/tests/codice/test_codice_l1b.py @@ -2,6 +2,7 @@ from unittest.mock import patch +import cdflib import numpy as np import pytest from imap_data_access import ProcessingInputCollection @@ -309,6 +310,29 @@ def test_l1b_hi_omni(mock_get_file_paths, codice_lut_path): assert cdf_file.name == f"imap_codice_l1b_hi-omni_{VALIDATION_FILE_DATE}_v999.cdf" +@pytest.mark.parametrize("descriptor", ["hi-omni", "hi-sectored"]) +def test_l1b_hi_epoch_delta_cdf_metadata(descriptor, codice_lut_path): + with patch( + "imap_data_access.processing_input.ProcessingInputCollection.get_file_paths" + ) as mock_get_file_paths: + mock_get_file_paths.side_effect = [ + codice_lut_path(descriptor=descriptor, data_type="l0"), + codice_lut_path(descriptor="l1a-sci-lut"), + ] + + l1a_file_path = write_cdf(process_l1a(dependency=ProcessingInputCollection())[0]) + processed_data = process_codice_l1b(file_path=l1a_file_path) + cdf_file_path = write_cdf(processed_data) + + cdf_file = cdflib.CDF(cdf_file_path) + + for var in ["epoch_delta_minus", "epoch_delta_plus"]: + var_info = cdf_file.varinq(var) + var_attrs = cdf_file.varattsget(var) + assert var_info.Data_Type_Description == "CDF_DOUBLE" + assert np.isclose(var_attrs["FILLVAL"], np.float64(-1.0e31)) + + @pytest.mark.xfail(reason="Need to revisit in future PR") @patch("imap_data_access.processing_input.ProcessingInputCollection.get_file_paths") def test_l1b_hi_sectored(mock_get_file_paths, codice_lut_path): From 8b01919c99554f527feafa1c1d5762fcb7da8ea4 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 19:05:17 -0400 Subject: [PATCH 09/14] chore: satisfy pre-commit checks for fillval updates Clean up the files touched by the ISTP fill-value work so they pass the current pre-commit hooks without changing the intended product behavior. - simplify the GLOWS L2 refactor to keep only the shared daily-lightcurve bin padding helper extracted - add complete numpydoc sections for the new CoDICE L1B helper - shorten test setup in the CoDICE L1B metadata regression to satisfy line-length checks This is a style and maintainability cleanup only. It keeps the underlying fill-value and dtype fixes intact while making the updated files pass ruff, numpydoc-validation, and mypy. --- imap_processing/codice/codice_l1b.py | 14 ++++- imap_processing/codice/codice_l2.py | 4 +- imap_processing/glows/l2/glows_l2.py | 57 ++++++++++++------- .../tests/codice/test_codice_hi_l2.py | 7 +-- .../tests/codice/test_codice_l1b.py | 3 +- imap_processing/tests/mag/test_mag_l2.py | 2 +- 6 files changed, 58 insertions(+), 29 deletions(-) diff --git a/imap_processing/codice/codice_l1b.py b/imap_processing/codice/codice_l1b.py index 44799cff3..de8557917 100644 --- a/imap_processing/codice/codice_l1b.py +++ b/imap_processing/codice/codice_l1b.py @@ -23,7 +23,19 @@ def _cast_epoch_delta_vars_to_float64(dataset: xr.Dataset) -> xr.Dataset: - """Normalize epoch delta support vars to floating-point before CDF write-out.""" + """ + Normalize epoch delta support vars to floating-point before CDF write-out. + + Parameters + ---------- + dataset : xarray.Dataset + Dataset whose epoch delta support variables should be normalized. + + Returns + ------- + xarray.Dataset + Dataset with epoch delta support variables stored as ``float64``. + """ for var in ["epoch_delta_plus", "epoch_delta_minus"]: if var in dataset: dataset[var] = xr.DataArray( diff --git a/imap_processing/codice/codice_l2.py b/imap_processing/codice/codice_l2.py index 3e7e2ee6c..b82458c9c 100644 --- a/imap_processing/codice/codice_l2.py +++ b/imap_processing/codice/codice_l2.py @@ -575,9 +575,7 @@ def process_lo_species_intensity( restored_values = np.nan_to_num( restored_values, nan=fillval, posinf=fillval, neginf=fillval ) - restored_values = np.clip(np.rint(restored_values), 0, 255).astype( - np.uint8 - ) + restored_values = np.clip(np.rint(restored_values), 0, 255).astype(np.uint8) dataset[var] = xr.DataArray( restored_values, dims=dataset[var].dims, diff --git a/imap_processing/glows/l2/glows_l2.py b/imap_processing/glows/l2/glows_l2.py index 1374f998f..ef5927e2c 100644 --- a/imap_processing/glows/l2/glows_l2.py +++ b/imap_processing/glows/l2/glows_l2.py @@ -23,6 +23,40 @@ logger = logging.getLogger(__name__) +def _pad_daily_lightcurve_bins(value: object, fillval: object) -> np.ndarray: + """ + Pad chopped daily-lightcurve bin data back to the standard bin count. + + Parameters + ---------- + value : object + Chopped daily-lightcurve bin data. + fillval : object + CDF fill value used for padded bins. + + Returns + ------- + numpy.ndarray + Bin data padded to the standard bin count. + """ + value_array = np.asarray(value) + padded_dtype = value_array.dtype + fillval_dtype = np.asarray(fillval).dtype + + if np.issubdtype(padded_dtype, np.integer): + try: + cast_fillval = np.array(fillval, dtype=padded_dtype).item() + except (OverflowError, TypeError, ValueError): + padded_dtype = np.result_type(padded_dtype, fillval_dtype) + else: + if cast_fillval != fillval: + padded_dtype = np.result_type(padded_dtype, fillval_dtype) + + padded = np.full(GlowsConstants.STANDARD_BIN_COUNT, fillval, dtype=padded_dtype) + padded[: len(value_array)] = value_array + return padded + + def glows_l2( input_dataset: xr.Dataset, pipeline_settings_dataset: xr.Dataset, @@ -193,7 +227,9 @@ def create_l2_dataset( # Convert time to UTC utc_string = [met_to_utc(ttj2000ns_to_met(value))] output[key] = xr.DataArray( - utc_string, dims=["epoch"], attrs=attrs.get_variable_attributes(key) + utc_string, + dims=["epoch"], + attrs=attrs.get_variable_attributes(key), ) elif key != "daily_lightcurve": val = value @@ -205,7 +241,6 @@ def create_l2_dataset( attrs=attrs.get_variable_attributes(key), ) - n_bins = histogram_l2.daily_lightcurve.number_of_bins for key, value in dataclasses.asdict(histogram_l2.daily_lightcurve).items(): if key == "number_of_bins": # number_of_bins does not have a bins dimension. @@ -219,23 +254,7 @@ def create_l2_dataset( # avoid operating on FILLVAL data. Re-expand to STANDARD_BIN_COUNT # here, filling unused bins with the variable's CDF FILLVAL. var_attrs = attrs.get_variable_attributes(key) - fillval = var_attrs["FILLVAL"] - value_array = np.asarray(value) - padded_dtype = value_array.dtype - if np.issubdtype(padded_dtype, np.integer): - try: - cast_fillval = np.array(fillval, dtype=padded_dtype).item() - except (OverflowError, TypeError, ValueError): - padded_dtype = np.result_type(padded_dtype, np.asarray(fillval).dtype) - else: - if cast_fillval != fillval: - padded_dtype = np.result_type( - padded_dtype, np.asarray(fillval).dtype - ) - padded = np.full( - GlowsConstants.STANDARD_BIN_COUNT, fillval, dtype=padded_dtype - ) - padded[:n_bins] = value_array + padded = _pad_daily_lightcurve_bins(value, var_attrs["FILLVAL"]) output[key] = xr.DataArray( np.array([padded]), dims=["epoch", "bins"], diff --git a/imap_processing/tests/codice/test_codice_hi_l2.py b/imap_processing/tests/codice/test_codice_hi_l2.py index 1d2dee991..54864cca2 100644 --- a/imap_processing/tests/codice/test_codice_hi_l2.py +++ b/imap_processing/tests/codice/test_codice_hi_l2.py @@ -65,6 +65,7 @@ def _lookup_l1_inputs(request_descriptor=None, data_type=None, **kwargs): def _mock_l2_file_paths(descriptor: str, l1b_file, codice_lut_path): """Return a side effect that points L2 processing at a generated L1B file.""" + def _side_effect(request_descriptor=None, data_type=None, **kwargs): request_descriptor = kwargs.get("descriptor", request_descriptor) if request_descriptor == descriptor: @@ -123,7 +124,7 @@ def test_l2_hi_omni(mock_get_file_paths): # Tests that dimensions match assert processed_l2[variable].dims == val_data[variable].dims, ( f"Dimension mismatch in coordinate '{variable}'" - ) + ) processed_l2.attrs["Data_version"] = "001" omni_cdf_file = write_cdf(processed_l2) @@ -223,9 +224,7 @@ def test_l2_hi_sectored(mock_get_file_paths): ("hi-sectored", "imap_codice_l2-hi-sectored-efficiency_20251008_v001.csv"), ], ) -def test_l2_hi_epoch_delta_cdf_metadata( - descriptor, efficiency_file, codice_lut_path -): +def test_l2_hi_epoch_delta_cdf_metadata(descriptor, efficiency_file, codice_lut_path): l1b_file = _generate_hi_l1b_file(descriptor, codice_lut_path) dependencies = ProcessingInputCollection( AncillaryInput(efficiency_file), diff --git a/imap_processing/tests/codice/test_codice_l1b.py b/imap_processing/tests/codice/test_codice_l1b.py index 96d418ac3..8c8086714 100644 --- a/imap_processing/tests/codice/test_codice_l1b.py +++ b/imap_processing/tests/codice/test_codice_l1b.py @@ -320,7 +320,8 @@ def test_l1b_hi_epoch_delta_cdf_metadata(descriptor, codice_lut_path): codice_lut_path(descriptor="l1a-sci-lut"), ] - l1a_file_path = write_cdf(process_l1a(dependency=ProcessingInputCollection())[0]) + l1a_dataset = process_l1a(dependency=ProcessingInputCollection())[0] + l1a_file_path = write_cdf(l1a_dataset) processed_data = process_codice_l1b(file_path=l1a_file_path) cdf_file_path = write_cdf(processed_data) diff --git a/imap_processing/tests/mag/test_mag_l2.py b/imap_processing/tests/mag/test_mag_l2.py index 51aa43280..7cc18574b 100644 --- a/imap_processing/tests/mag/test_mag_l2.py +++ b/imap_processing/tests/mag/test_mag_l2.py @@ -5,8 +5,8 @@ import pytest import xarray as xr -from imap_processing.cdf.utils import write_cdf from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes +from imap_processing.cdf.utils import write_cdf from imap_processing.mag.constants import FILLVAL, DataMode from imap_processing.mag.l2.mag_l2 import mag_l2, retrieve_matrix_from_l2_calibration from imap_processing.mag.l2.mag_l2_data import MagL2, ValidFrames From 2ace40d590fae02dc44cf6a08b1a07f45af3b1a4 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 19:20:00 -0400 Subject: [PATCH 10/14] Revert "Update .gitignore to include imap_processing/_version.py for local builds" This reverts commit c2d1cc7c07510b07a7cf297b91c390274a51f86d. --- .gitignore | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.gitignore b/.gitignore index 78f24861b..bdc9cfa1d 100644 --- a/.gitignore +++ b/.gitignore @@ -37,11 +37,6 @@ share/python-wheels/ MANIFEST .python-version -# Generated by poetry-dynamic-versioning during local installs/builds. -# Keep it untracked so environment-specific version substitutions do not dirty -# the worktree on every setup or test run. -imap_processing/_version.py - # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. From 5cb912424bd817b72f318ddf415a92fbc3d38d77 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 14 May 2026 17:00:54 -0400 Subject: [PATCH 11/14] Keep CoDICE LO direct-event spin_sector as integer metadata Preserve spin_sector as an unsigned integer index in the LO direct-event L2 output by replacing invalid sectors with the uint8 fill sentinel instead of NaN. This prevents NumPy from widening the index variable to float and keeps the written CDF type/FILLVAL aligned with the variable semantics. Update the regression assertion to verify spin_sector is written as CDF_UINT1 with FILLVAL 255. --- ...ce_l2-lo-direct-events_variable_attrs.yaml | 4 +-- imap_processing/codice/codice_l2.py | 15 +++++++--- .../tests/codice/test_codice_l2.py | 28 +++++++++++++++++-- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml index 5db436769..80774344a 100644 --- a/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-lo-direct-events_variable_attrs.yaml @@ -260,8 +260,8 @@ spin_sector: DEPEND_2: event_num DICT_KEY: SPASE>Support>SupportQuantity:Positional FIELDNAM: Spin Sector Index - FILLVAL: *real_fillval - FORMAT: F8.1 + FILLVAL: *uint8_fillval + FORMAT: I3 LABLAXIS: Spin Sector LABL_PTR_1: priority_label LABL_PTR_2: event_num_label diff --git a/imap_processing/codice/codice_l2.py b/imap_processing/codice/codice_l2.py index b82458c9c..733c4da03 100644 --- a/imap_processing/codice/codice_l2.py +++ b/imap_processing/codice/codice_l2.py @@ -1262,6 +1262,10 @@ def process_lo_direct_events(dependencies: ProcessingInputCollection) -> xr.Data l2_dataset["position"].dims, elevation_angle.astype(np.float32), ) + spin_sector_attrs = cdf_attrs.get_variable_attributes( + "spin_sector", check_schema=False + ) + spin_sector_fillval = np.uint8(spin_sector_attrs["FILLVAL"]) # Convert spin_sector to spin_angle in degrees # Use equation from section 11.2.2 of algorithm document # Shift all spin sectors for all positions 13 - 24 adding 12 and mod 24 @@ -1273,13 +1277,16 @@ def process_lo_direct_events(dependencies: ProcessingInputCollection) -> xr.Data ) l2_dataset["spin_angle"] = l2_dataset["spin_sector"].astype(np.float32) * 15.0 + 7.5 - # Set spin angle and sector to NaN for invalid positions (>23) + # Preserve spin_sector as an integer index while marking invalid sectors. + invalid_spin_sector = ~np.isfinite(original_spin_sector) | ( + original_spin_sector > 23 + ) l2_dataset["spin_angle"] = xr.where( - (original_spin_sector > 23), np.nan, l2_dataset["spin_angle"] + invalid_spin_sector, np.nan, l2_dataset["spin_angle"] ) l2_dataset["spin_sector"] = xr.where( - (original_spin_sector > 23), np.nan, l2_dataset["spin_sector"] - ) + invalid_spin_sector, spin_sector_fillval, l2_dataset["spin_sector"] + ).astype(np.uint8) # convert apd energy to physical units # Set the gain labels based on gain values gains = l2_dataset["gain"].values.ravel() diff --git a/imap_processing/tests/codice/test_codice_l2.py b/imap_processing/tests/codice/test_codice_l2.py index 4e7a6c7e4..3563f3a87 100644 --- a/imap_processing/tests/codice/test_codice_l2.py +++ b/imap_processing/tests/codice/test_codice_l2.py @@ -634,6 +634,30 @@ def test_codice_l2_lo_de(mock_get_file_paths, codice_lut_path): ] processed_l2_ds = process_codice_l2("lo-direct-events", ProcessingInputCollection()) + l1a_input_ds = load_cdf(processed_l1a_file) + original_spin_sector = l1a_input_ds["spin_sector"].values + # Mirror the LO direct-event spin-sector remapping so this test catches any + # unintended changes to valid sector values while still checking that only + # invalid sectors are replaced with the uint8 fill value. + expected_spin_sector = np.where( + (l1a_input_ds["position"].values >= 13) + & (l1a_input_ds["position"].values <= 24), + (original_spin_sector + 12) % 24, + original_spin_sector, + ) + invalid_spin_sector = ~np.isfinite(original_spin_sector) | ( + original_spin_sector > 23 + ) + expected_spin_sector = np.where( + invalid_spin_sector, np.uint8(255), expected_spin_sector + ).astype(np.uint8) + assert processed_l2_ds["spin_sector"].dtype == np.uint8 + np.testing.assert_array_equal( + processed_l2_ds["spin_sector"].values, + expected_spin_sector, + err_msg="LO direct-event spin_sector values changed unexpectedly", + ) + l2_val_data = ( imap_module_directory / "tests" @@ -678,8 +702,8 @@ def test_codice_l2_lo_de(mock_get_file_paths, codice_lut_path): spin_sector_attrs = cdf_file.varattsget("spin_sector") data_quality_info = cdf_file.varinq("data_quality") data_quality_attrs = cdf_file.varattsget("data_quality") - assert spin_sector_info.Data_Type_Description == "CDF_DOUBLE" - assert np.isclose(spin_sector_attrs["FILLVAL"], np.float64(-1.0e31)) + assert spin_sector_info.Data_Type_Description == "CDF_UINT1" + assert spin_sector_attrs["FILLVAL"] == np.uint8(255) assert data_quality_info.Data_Type_Description == "CDF_UINT2" assert data_quality_attrs["FILLVAL"] == np.uint16(65535) load_cdf(file) From 0570a7f25c2a1fd19facf1bf02ef2525ee2b4db2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 14 May 2026 17:03:40 -0400 Subject: [PATCH 12/14] Align SWAPI esa_energy bounds with float metadata Update esa_energy VALIDMIN/VALIDMAX to floating-point values so the metadata is consistent with the CDF_DOUBLE variable type, real-valued FILLVAL, and float FORMAT. This preserves the existing 65535 upper bound for now. That value is probably a legacy/raw-storage bound rather than the most scientifically meaningful maximum for esa_energy, so a future update should use an instrument-approved value from the SWAPI scientists. --- imap_processing/cdf/config/imap_swapi_variable_attrs.yaml | 4 ++-- imap_processing/tests/swapi/test_swapi_l2.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml index 571947887..66b20efd9 100644 --- a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml @@ -113,8 +113,8 @@ esa_energy: LABL_PTR_1: esa_step_label SCALETYP: linear UNITS: eV / q - VALIDMAX: 65535 - VALIDMIN: 0 + VALIDMAX: 65535.0 + VALIDMIN: 0.0 VAR_TYPE: support_data metadata_default: &metadata_default diff --git a/imap_processing/tests/swapi/test_swapi_l2.py b/imap_processing/tests/swapi/test_swapi_l2.py index a78f3bedc..ce94dd777 100644 --- a/imap_processing/tests/swapi/test_swapi_l2.py +++ b/imap_processing/tests/swapi/test_swapi_l2.py @@ -131,6 +131,8 @@ def second_get_file_paths_side_effect(descriptor): esa_energy_attrs = cdf_file.varattsget("esa_energy") assert esa_energy_info.Data_Type_Description == "CDF_DOUBLE" assert np.isclose(esa_energy_attrs["FILLVAL"], np.float64(-1.0e31)) + assert esa_energy_attrs["VALIDMAX"] == np.float64(65535.0) + assert esa_energy_attrs["VALIDMIN"] == np.float64(0.0) # Test uncertainty variables are as expected np.testing.assert_array_equal( From d93f105c158ff1eb985aeae6165ac6237f350e65 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 15 May 2026 11:54:15 -0400 Subject: [PATCH 13/14] revert(codice): drop hi epoch_delta float-casting from fillval branch Remove the CoDICE Hi-only `epoch_delta_minus` / `epoch_delta_plus` float-casting changes from the ISTP fill-value branch. This branch had picked up an additional CoDICE-specific workaround that: - cast `epoch_delta_*` to `float64` in the Hi L1B path - changed the Hi omni and Hi sectored L2 attr templates to float-style `FILLVAL` / `FORMAT` - added float-specific regression tests around that behavior That work is no longer intended to ship as part of the fill-value PR. The final CoDICE `epoch_delta_*` datatype decision will be handled in a separate follow-up PR. Revert the branch-local workaround by: - removing `_cast_epoch_delta_vars_to_float64()` from `codice_l1b.py` - removing the `hi-omni` / `hi-sectored` call site in `process_codice_l1b()` - restoring the Hi omni and Hi sectored `epoch_delta_*` L2 metadata to the current `upstream/dev` state - deleting the float-specific CoDICE epoch-delta regression tests from `test_codice_l1b.py` and `test_codice_hi_l2.py` No new epoch-delta behavior is introduced here. This change only removes the extra branch complexity so `fix/istp_fillval` goes back to its intended scope. Validated locally with: - `pytest -q imap_processing/tests/codice/test_codice_l1b.py` - `pytest -q imap_processing/tests/codice/test_codice_hi_l2.py` - `pytest -q imap_processing/tests/codice/test_codice_l2.py -k 'lo_de or hi_de or sw_species'` - `pre-commit` on the touched CoDICE files --- ...imap_codice_l2-hi-omni_variable_attrs.yaml | 8 +- ..._codice_l2-hi-sectored_variable_attrs.yaml | 8 +- imap_processing/codice/codice_l1b.py | 27 ------ .../tests/codice/test_codice_hi_l2.py | 85 ------------------- .../tests/codice/test_codice_l1b.py | 25 ------ 5 files changed, 8 insertions(+), 145 deletions(-) diff --git a/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml index eb8075eae..e131e34d2 100644 --- a/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-hi-omni_variable_attrs.yaml @@ -26,8 +26,8 @@ epoch_delta_minus: CATDESC: Time from acquisition start to acquisition center DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta minus - FILLVAL: *real_fillval - FORMAT: F19.1 + FILLVAL: *min_int + FORMAT: I19 LABLAXIS: Epoch Delta Minus SCALETYP: linear UNITS: ns @@ -39,8 +39,8 @@ epoch_delta_plus: CATDESC: Time from acquisition center to acquisition end DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta plus - FILLVAL: *real_fillval - FORMAT: F19.1 + FILLVAL: *min_int + FORMAT: I19 LABLAXIS: Epoch Delta Plus SCALETYP: linear UNITS: ns diff --git a/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml b/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml index 15c6e553e..e583851b7 100644 --- a/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_codice_l2-hi-sectored_variable_attrs.yaml @@ -24,8 +24,8 @@ epoch_delta_minus: CATDESC: Time from acquisition start to acquisition center DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta minus - FILLVAL: *real_fillval - FORMAT: F19.1 + FILLVAL: *min_int + FORMAT: I19 LABLAXIS: Epoch Delta Minus SCALETYP: linear UNITS: ns @@ -37,8 +37,8 @@ epoch_delta_plus: CATDESC: Time from acquisition center to acquisition end DICT_KEY: SPASE>Support>SupportQuantity:Temporal,Qualifier:Uncertainty FIELDNAM: epoch delta plus - FILLVAL: *real_fillval - FORMAT: F19.1 + FILLVAL: *min_int + FORMAT: I19 LABLAXIS: Epoch Delta Plus SCALETYP: linear UNITS: ns diff --git a/imap_processing/codice/codice_l1b.py b/imap_processing/codice/codice_l1b.py index e7be10f8c..644f59a3b 100644 --- a/imap_processing/codice/codice_l1b.py +++ b/imap_processing/codice/codice_l1b.py @@ -22,30 +22,6 @@ logger = logging.getLogger(__name__) -def _cast_epoch_delta_vars_to_float64(dataset: xr.Dataset) -> xr.Dataset: - """ - Normalize epoch delta support vars to floating-point before CDF write-out. - - Parameters - ---------- - dataset : xarray.Dataset - Dataset whose epoch delta support variables should be normalized. - - Returns - ------- - xarray.Dataset - Dataset with epoch delta support variables stored as ``float64``. - """ - for var in ["epoch_delta_plus", "epoch_delta_minus"]: - if var in dataset: - dataset[var] = xr.DataArray( - dataset[var].data.astype(np.float64), - dims=dataset[var].dims, - attrs=dataset[var].attrs, - ) - return dataset - - def convert_to_rates(dataset: xr.Dataset, descriptor: str) -> np.ndarray: """ Apply a conversion from counts to rates. @@ -208,9 +184,6 @@ def process_codice_l1b(file_path: Path) -> xr.Dataset: # Use the L1a data product as a starting point for L1b l1b_dataset = l1a_dataset.copy(deep=True) - if descriptor in ["hi-omni", "hi-sectored"]: - l1b_dataset = _cast_epoch_delta_vars_to_float64(l1b_dataset) - # Update the global attributes l1b_dataset.attrs = cdf_attrs.get_global_attributes(dataset_name) diff --git a/imap_processing/tests/codice/test_codice_hi_l2.py b/imap_processing/tests/codice/test_codice_hi_l2.py index 54864cca2..af7f27716 100644 --- a/imap_processing/tests/codice/test_codice_hi_l2.py +++ b/imap_processing/tests/codice/test_codice_hi_l2.py @@ -1,6 +1,5 @@ from unittest.mock import patch -import cdflib import numpy as np import pytest from imap_data_access.processing_input import ( @@ -11,8 +10,6 @@ from imap_processing import imap_module_directory from imap_processing.cdf.utils import load_cdf, write_cdf -from imap_processing.codice.codice_l1a import process_l1a -from imap_processing.codice.codice_l1b import process_codice_l1b from imap_processing.codice.codice_l2 import ( process_codice_l2, ) @@ -24,57 +21,6 @@ pytestmark = pytest.mark.external_test_data -def _generate_hi_l1b_file(descriptor: str, codice_lut_path): - """Generate a fresh Hi L1B CDF for metadata regression tests. - - We need this helper because the checked-in ``tests/codice/data/l1b_validation`` - artifacts predate the epoch-delta dtype fix and still serialize - ``epoch_delta_plus`` / ``epoch_delta_minus`` as integer CDF variables. - The L2 metadata tests below are specifically trying to verify the current - regenerated pipeline behavior, so they must consume an L1B file produced by - the current L1A -> L1B code path instead of the historical validation - artifact. - - If we later refresh the Hi L1B validation CDFs to include this dtype fix, - this helper can be removed and the L2 metadata tests can go back to using - the checked-in ``l1b_validation`` files directly. - """ - - def _lookup_l1_inputs(request_descriptor=None, data_type=None, **kwargs): - # ``process_l1a()`` asks for two different inputs through the same file - # lookup hook: - # 1. the raw science packet via ``data_type='l0'`` with no descriptor - # 2. the science LUT via ``descriptor='l1a-sci-lut'`` - # - # Patch the lookup so the real production code can run unchanged while - # the test routes those requests to the correct local test artifacts. - request_descriptor = kwargs.get("descriptor", request_descriptor) - if request_descriptor is None and data_type == "l0": - return codice_lut_path(descriptor, data_type="l0") - return codice_lut_path(request_descriptor, data_type) - - with patch( - "imap_data_access.processing_input.ProcessingInputCollection.get_file_paths" - ) as mock_get_file_paths: - mock_get_file_paths.side_effect = _lookup_l1_inputs - processed_l1a_file = write_cdf(process_l1a(ProcessingInputCollection())[0]) - processed_l1b = process_codice_l1b(processed_l1a_file) - processed_l1b.attrs["Data_version"] = "001" - return write_cdf(processed_l1b) - - -def _mock_l2_file_paths(descriptor: str, l1b_file, codice_lut_path): - """Return a side effect that points L2 processing at a generated L1B file.""" - - def _side_effect(request_descriptor=None, data_type=None, **kwargs): - request_descriptor = kwargs.get("descriptor", request_descriptor) - if request_descriptor == descriptor: - return [l1b_file] - return codice_lut_path(request_descriptor, data_type) - - return _side_effect - - @pytest.fixture def mock_get_file_paths(codice_lut_path): with patch( @@ -215,34 +161,3 @@ def test_l2_hi_sectored(mock_get_file_paths): sectored_cdf_file.name == f"imap_codice_l2_hi-sectored_{VALIDATION_FILE_DATE}_v001.cdf" ) - - -@pytest.mark.parametrize( - ("descriptor", "efficiency_file"), - [ - ("hi-omni", "imap_codice_l2-hi-omni-efficiency_20251212_v003.csv"), - ("hi-sectored", "imap_codice_l2-hi-sectored-efficiency_20251008_v001.csv"), - ], -) -def test_l2_hi_epoch_delta_cdf_metadata(descriptor, efficiency_file, codice_lut_path): - l1b_file = _generate_hi_l1b_file(descriptor, codice_lut_path) - dependencies = ProcessingInputCollection( - AncillaryInput(efficiency_file), - ScienceInput(l1b_file.name), - ) - - with patch( - "imap_data_access.processing_input.ProcessingInputCollection.get_file_paths" - ) as mock_get_file_paths: - mock_get_file_paths.side_effect = _mock_l2_file_paths( - descriptor, l1b_file, codice_lut_path - ) - processed_l2 = process_codice_l2(descriptor, dependencies) - - processed_l2.attrs["Data_version"] = "001" - cdf_file = cdflib.CDF(write_cdf(processed_l2)) - for var in ["epoch_delta_minus", "epoch_delta_plus"]: - var_info = cdf_file.varinq(var) - var_attrs = cdf_file.varattsget(var) - assert var_info.Data_Type_Description == "CDF_DOUBLE" - assert np.isclose(var_attrs["FILLVAL"], np.float64(-1.0e31)) diff --git a/imap_processing/tests/codice/test_codice_l1b.py b/imap_processing/tests/codice/test_codice_l1b.py index 1d22aa2f3..3c4532d8a 100644 --- a/imap_processing/tests/codice/test_codice_l1b.py +++ b/imap_processing/tests/codice/test_codice_l1b.py @@ -2,7 +2,6 @@ from unittest.mock import patch -import cdflib import numpy as np import pytest from imap_data_access import ProcessingInputCollection @@ -116,30 +115,6 @@ def test_l1b_hi_omni(mock_get_file_paths, codice_lut_path): assert cdf_file.name == f"imap_codice_l1b_hi-omni_{VALIDATION_FILE_DATE}_v999.cdf" -@pytest.mark.parametrize("descriptor", ["hi-omni", "hi-sectored"]) -def test_l1b_hi_epoch_delta_cdf_metadata(descriptor, codice_lut_path): - with patch( - "imap_data_access.processing_input.ProcessingInputCollection.get_file_paths" - ) as mock_get_file_paths: - mock_get_file_paths.side_effect = [ - codice_lut_path(descriptor=descriptor, data_type="l0"), - codice_lut_path(descriptor="l1a-sci-lut"), - ] - - l1a_dataset = process_l1a(dependency=ProcessingInputCollection())[0] - l1a_file_path = write_cdf(l1a_dataset) - processed_data = process_codice_l1b(file_path=l1a_file_path) - cdf_file_path = write_cdf(processed_data) - - cdf_file = cdflib.CDF(cdf_file_path) - - for var in ["epoch_delta_minus", "epoch_delta_plus"]: - var_info = cdf_file.varinq(var) - var_attrs = cdf_file.varattsget(var) - assert var_info.Data_Type_Description == "CDF_DOUBLE" - assert np.isclose(var_attrs["FILLVAL"], np.float64(-1.0e31)) - - @pytest.mark.xfail(reason="Need to revisit in future PR") @patch("imap_data_access.processing_input.ProcessingInputCollection.get_file_paths") def test_l1b_hi_sectored(mock_get_file_paths, codice_lut_path): From 20e4bfa54092a81b091634077733aa258e2388b6 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 15 May 2026 13:41:53 -0400 Subject: [PATCH 14/14] Update VALIDMAX for esa_energy to 21000.0 in YAML config and tests Margaret says that 21000 is a better VALIDMAX for esa_energy on SWAPI. --- imap_processing/cdf/config/imap_swapi_variable_attrs.yaml | 2 +- imap_processing/tests/swapi/test_swapi_l2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml index 66b20efd9..a74cb9160 100644 --- a/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_swapi_variable_attrs.yaml @@ -113,7 +113,7 @@ esa_energy: LABL_PTR_1: esa_step_label SCALETYP: linear UNITS: eV / q - VALIDMAX: 65535.0 + VALIDMAX: 21000.0 VALIDMIN: 0.0 VAR_TYPE: support_data diff --git a/imap_processing/tests/swapi/test_swapi_l2.py b/imap_processing/tests/swapi/test_swapi_l2.py index ce94dd777..8189eaa0c 100644 --- a/imap_processing/tests/swapi/test_swapi_l2.py +++ b/imap_processing/tests/swapi/test_swapi_l2.py @@ -131,7 +131,7 @@ def second_get_file_paths_side_effect(descriptor): esa_energy_attrs = cdf_file.varattsget("esa_energy") assert esa_energy_info.Data_Type_Description == "CDF_DOUBLE" assert np.isclose(esa_energy_attrs["FILLVAL"], np.float64(-1.0e31)) - assert esa_energy_attrs["VALIDMAX"] == np.float64(65535.0) + assert esa_energy_attrs["VALIDMAX"] == np.float64(21000.0) assert esa_energy_attrs["VALIDMIN"] == np.float64(0.0) # Test uncertainty variables are as expected