From d1bdf2e1db1b3e4379cd008d65ccfb0dbc189a16 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 23:22:40 -0400 Subject: [PATCH 1/4] fix(glows): write L2 hist labels as strings and normalize global attr typing Clean up the remaining GLOWS L2 histogram metadata issues around label variables and global attribute typing. The current L2 hist write path was creating `bins_label` and `flags_label` as scalar `-1` placeholders with no explicit dimensions. That caused the emitted CDF variables to be written as integer types even though the metadata declares them as string labels. This matches the bug described in issue #2807. Fix this by writing both label variables as real string arrays: - `bins_label` now uses the stringified bin coordinate values with its own `bins_label` dimension, matching the existing L1A/L1B pattern - `flags_label` now uses the ordered bad-time flag names with its own `flags_label` dimension instead of a scalar integer placeholder To keep the label ordering consistent across the GLOWS pipeline, extract the bad-time flag names into a shared `BAD_TIME_FLAG_NAMES` constant and reuse it in both: - `PipelineSettings` bad-time flag extraction - GLOWS L2 `flags_label` construction Update the L2 attrs to match the written string labels: - keep `bins_label FORMAT` as `A4` - widen `flags_label FORMAT` from `A4` to `A42` so the longest shared bad-time flag name fits without truncation Also normalize `flight_software_version` to a string before assigning it as an L2 global attribute. The value content is preserved, but this avoids writing it as a numeric global attribute (`CDF_INT8`), which the validator flags as problematic for this metadata. As part of this work, rechecked `bad_time_flag_occurrences` on current `dev`. It is already being written as `CDF_UINT2`, so no product code change is made there in this commit. Add written-CDF regression coverage for GLOWS L2 hist to verify: - `bins_label` is emitted as `CDF_CHAR` with string bin labels - `flags_label` is emitted as `CDF_CHAR` with the shared bad-time flag names in the expected order - `flags_label FORMAT` remains wide enough for the longest current flag name - `bad_time_flag_occurrences` remains an integer CDF variable - `flight_software_version` is written as a string global attribute This change is intentionally limited to the GLOWS `hist` metadata issues covered by issue #2807 and the remaining related global-attribute typing warning. It does not include the separate GLOWS fill-value fixes for `histogram_flag_array` and `number_of_bins`. --- .../config/imap_glows_l2_variable_attrs.yaml | 2 +- imap_processing/glows/__init__.py | 22 ++++++- imap_processing/glows/l1b/glows_l1b_data.py | 25 +------ imap_processing/glows/l2/glows_l2.py | 38 +++++++++-- imap_processing/tests/glows/test_glows_l2.py | 66 +++++++++++++++++++ 5 files changed, 124 insertions(+), 29 deletions(-) diff --git a/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml b/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml index 209b93e097..094e3065af 100644 --- a/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml @@ -14,7 +14,7 @@ flags_label: # CATDESC: L1B flag identifier for daily-ocurrence counter CATDESC: Flag names for daily-occurence counters of L1B flags FIELDNAM: L1B flag name - FORMAT: A4 + FORMAT: A42 VAR_TYPE: metadata default_attrs: &default_attrs diff --git a/imap_processing/glows/__init__.py b/imap_processing/glows/__init__.py index fb62e939ca..20c76623e5 100644 --- a/imap_processing/glows/__init__.py +++ b/imap_processing/glows/__init__.py @@ -1,4 +1,24 @@ __version__ = "v001" +BAD_TIME_FLAG_NAMES = ( + "is_pps_missing", + "is_time_status_missing", + "is_phase_missing", + "is_spin_period_missing", + "is_overexposed", + "is_direct_event_non_monotonic", + "is_night", + "is_hv_test_in_progress", + "is_test_pulse_in_progress", + "is_memory_error_detected", + "is_generated_on_ground", + "is_beyond_daily_statistical_error", + "is_temperature_std_dev_beyond_threshold", + "is_hv_voltage_std_dev_beyond_threshold", + "is_spin_period_std_dev_beyond_threshold", + "is_pulse_length_std_dev_beyond_threshold", + "is_spin_period_difference_beyond_threshold", +) + # Quality flag list length. Used in L1B and L2. -FLAG_LENGTH = 17 +FLAG_LENGTH = len(BAD_TIME_FLAG_NAMES) diff --git a/imap_processing/glows/l1b/glows_l1b_data.py b/imap_processing/glows/l1b/glows_l1b_data.py index 28e570f32b..0f444394f2 100644 --- a/imap_processing/glows/l1b/glows_l1b_data.py +++ b/imap_processing/glows/l1b/glows_l1b_data.py @@ -7,7 +7,7 @@ import xarray as xr from scipy.stats import circmean, circstd -from imap_processing.glows import FLAG_LENGTH +from imap_processing.glows import BAD_TIME_FLAG_NAMES, FLAG_LENGTH from imap_processing.glows.utils.constants import TimeTuple from imap_processing.quality_flags import GLOWSL1bFlags from imap_processing.spice import geometry @@ -135,37 +135,18 @@ def __post_init__(self, pipeline_dataset: xr.Dataset) -> None: self.active_bad_angle_flags = [True, True, True, True] # Extract active bad-time flags (default to all True if not present) - _time_flag_names = [ - "is_pps_missing", - "is_time_status_missing", - "is_phase_missing", - "is_spin_period_missing", - "is_overexposed", - "is_direct_event_non_monotonic", - "is_night", - "is_hv_test_in_progress", - "is_test_pulse_in_progress", - "is_memory_error_detected", - "is_generated_on_ground", - "is_beyond_daily_statistical_error", - "is_temperature_std_dev_beyond_threshold", - "is_hv_voltage_std_dev_beyond_threshold", - "is_spin_period_std_dev_beyond_threshold", - "is_pulse_length_std_dev_beyond_threshold", - "is_spin_period_difference_beyond_threshold", - ] if "active_bad_time_flags" in pipeline_dataset.data_vars: self.active_bad_time_flags = list( pipeline_dataset["active_bad_time_flags"].values ) elif any( f"active_bad_time_flags_{n}" in pipeline_dataset.data_vars - for n in _time_flag_names + for n in BAD_TIME_FLAG_NAMES ): # Flattened format from convert_json_to_dataset self.active_bad_time_flags = [ bool(pipeline_dataset[f"active_bad_time_flags_{name}"].values) - for name in _time_flag_names + for name in BAD_TIME_FLAG_NAMES ] else: # Default: assume all bad-time flags are active diff --git a/imap_processing/glows/l2/glows_l2.py b/imap_processing/glows/l2/glows_l2.py index e6f08e0e9b..977c26a03f 100644 --- a/imap_processing/glows/l2/glows_l2.py +++ b/imap_processing/glows/l2/glows_l2.py @@ -7,7 +7,7 @@ import xarray as xr from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes -from imap_processing.glows import FLAG_LENGTH +from imap_processing.glows import BAD_TIME_FLAG_NAMES, FLAG_LENGTH from imap_processing.glows.l1b.glows_l1b_data import ( PipelineSettings, ) @@ -23,6 +23,32 @@ logger = logging.getLogger(__name__) +def _normalize_global_attr_to_string(value: object) -> str: + """ + Convert a scalar-like global attribute value to a CDF_CHAR-compatible string. + + Parameters + ---------- + value : object + Global attribute value to normalize. + + Returns + ------- + str + String representation suitable for writing as a CDF_CHAR global attribute. + """ + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, (list, tuple, np.ndarray)): + array = np.asarray(value) + if array.size == 0: + return "" + value = array.reshape(-1)[0] + return str(value) + + def glows_l2( input_dataset: xr.Dataset, pipeline_settings_dataset: xr.Dataset, @@ -116,8 +142,9 @@ def create_l2_dataset( ) bins_label = xr.DataArray( - -1, + bins.data.astype(str), name="bins_label", + dims=["bins_label"], attrs=attrs.get_variable_attributes("bins_label", check_schema=False), ) @@ -128,8 +155,9 @@ def create_l2_dataset( ) flags_label = xr.DataArray( - -1, + np.array(BAD_TIME_FLAG_NAMES), name="flags_label", + dims=["flags_label"], attrs=attrs.get_variable_attributes("flags_label", check_schema=False), ) @@ -151,8 +179,8 @@ def create_l2_dataset( attrs=attrs.get_global_attributes("imap_glows_l2_hist"), ) - output.attrs["flight_software_version"] = input_attrs.get( - "flight_software_version", "" + output.attrs["flight_software_version"] = _normalize_global_attr_to_string( + input_attrs.get("flight_software_version", "") ) output.attrs["pkts_file_name"] = input_attrs.get("pkts_file_name", []) diff --git a/imap_processing/tests/glows/test_glows_l2.py b/imap_processing/tests/glows/test_glows_l2.py index b4e40de25f..5b0f623026 100644 --- a/imap_processing/tests/glows/test_glows_l2.py +++ b/imap_processing/tests/glows/test_glows_l2.py @@ -1,10 +1,13 @@ from unittest.mock import patch +import cdflib import numpy as np import pytest import xarray as xr from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes +from imap_processing.cdf.utils import write_cdf +from imap_processing.glows import BAD_TIME_FLAG_NAMES from imap_processing.glows.l1b.glows_l1b import glows_l1b from imap_processing.glows.l1b.glows_l1b_data import ( HistogramL1B, @@ -182,6 +185,69 @@ def test_generate_l2( assert ds.bad_time_flag_occurrences.dtype == np.uint16 +@patch.object(HistogramL2, "compute_position_angle", return_value=42.0) +@patch.object( + HistogramL1B, + "flag_uv_and_excluded", + return_value=(np.zeros(3600, dtype=bool), np.zeros(3600, dtype=bool)), +) +@patch.object(HistogramL1B, "update_spice_parameters", autospec=True) +def test_glows_l2_cdf_metadata( + mock_spice_function, + mock_flag_uv_and_excluded, + mock_compute_position_angle, + l1a_dataset, + mock_ancillary_exclusions, + mock_pipeline_settings, + mock_conversion_table_dict, + mock_ecliptic_bin_centers, + mock_calibration_dataset, +): + """Written GLOWS L2 CDF metadata should match the intended label and attr types.""" + mock_spice_function.side_effect = mock_update_spice_parameters + + l1b_hist_dataset = glows_l1b( + l1a_dataset[0], + mock_ancillary_exclusions.excluded_regions, + mock_ancillary_exclusions.uv_sources, + mock_ancillary_exclusions.suspected_transients, + mock_ancillary_exclusions.exclusions_by_instr_team, + mock_pipeline_settings, + mock_conversion_table_dict, + ) + l1b_hist_dataset.attrs["Repointing"] = "repoint00047" + l2_dataset = glows_l2( + l1b_hist_dataset, mock_pipeline_settings, mock_calibration_dataset + )[0] + cdf_path = write_cdf(l2_dataset) + + cdf_file = cdflib.CDF(cdf_path) + bins_label_info = cdf_file.varinq("bins_label") + bins_label_attrs = cdf_file.varattsget("bins_label") + bins_label_values = cdf_file.varget("bins_label") + flags_label_info = cdf_file.varinq("flags_label") + flags_label_attrs = cdf_file.varattsget("flags_label") + flags_label_values = cdf_file.varget("flags_label") + bad_time_info = cdf_file.varinq("bad_time_flag_occurrences") + bad_time_attrs = cdf_file.varattsget("bad_time_flag_occurrences") + global_attrs = cdf_file.globalattsget() + + assert bins_label_info.Data_Type_Description == "CDF_CHAR" + assert bins_label_attrs["FORMAT"] == "A4" + assert list(bins_label_values[:5]) == ["0", "1", "2", "3", "4"] + + assert flags_label_info.Data_Type_Description == "CDF_CHAR" + assert flags_label_attrs["FORMAT"] == "A42" + assert list(flags_label_values) == list(BAD_TIME_FLAG_NAMES) + assert max(len(name) for name in BAD_TIME_FLAG_NAMES) <= int( + flags_label_attrs["FORMAT"][1:] + ) + + assert bad_time_info.Data_Type_Description == "CDF_UINT2" + assert bad_time_attrs["FORMAT"] == "I5" + assert global_attrs["flight_software_version"] == ["131329"] + + def test_bin_exclusions(l1b_hists): # TODO test excluding bins as well From d377ad10678b121e889f0b0e35ab799f733e7f34 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 13 May 2026 23:32:22 -0400 Subject: [PATCH 2/4] fix(tests): add assertion message for flags_label FORMAT length check --- imap_processing/tests/glows/test_glows_l2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/imap_processing/tests/glows/test_glows_l2.py b/imap_processing/tests/glows/test_glows_l2.py index 5b0f623026..1ca603e045 100644 --- a/imap_processing/tests/glows/test_glows_l2.py +++ b/imap_processing/tests/glows/test_glows_l2.py @@ -241,6 +241,9 @@ def test_glows_l2_cdf_metadata( assert list(flags_label_values) == list(BAD_TIME_FLAG_NAMES) assert max(len(name) for name in BAD_TIME_FLAG_NAMES) <= int( flags_label_attrs["FORMAT"][1:] + ), ( + "Update flags_label FORMAT in imap_glows_l2_variable_attrs.yaml " + "if a flag name exceeds A42." ) assert bad_time_info.Data_Type_Description == "CDF_UINT2" From 9286722ae7acbe6672d16a80190d5e368348854f Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 14 May 2026 17:52:43 -0400 Subject: [PATCH 3/4] fix(tests): add test for _normalize_global_attr_to_string function Improve coverage for the patch. --- imap_processing/tests/glows/test_glows_l2.py | 22 +++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/imap_processing/tests/glows/test_glows_l2.py b/imap_processing/tests/glows/test_glows_l2.py index 1ca603e045..be795d05c1 100644 --- a/imap_processing/tests/glows/test_glows_l2.py +++ b/imap_processing/tests/glows/test_glows_l2.py @@ -13,7 +13,11 @@ HistogramL1B, PipelineSettings, ) -from imap_processing.glows.l2.glows_l2 import create_l2_dataset, glows_l2 +from imap_processing.glows.l2.glows_l2 import ( + _normalize_global_attr_to_string, + create_l2_dataset, + glows_l2, +) from imap_processing.glows.l2.glows_l2_data import DailyLightcurve, HistogramL2 from imap_processing.glows.utils.constants import GlowsConstants from imap_processing.spice.time import et_to_datetime64, ttj2000ns_to_et @@ -251,6 +255,22 @@ def test_glows_l2_cdf_metadata( assert global_attrs["flight_software_version"] == ["131329"] +@pytest.mark.parametrize( + ("value", "expected"), + [ + (None, ""), + ("131329", "131329"), + ([], ""), + (np.array([]), ""), + ([131329], "131329"), + ((131329,), "131329"), + (np.array([131329]), "131329"), + ], +) +def test_normalize_global_attr_to_string(value, expected): + assert _normalize_global_attr_to_string(value) == expected + + def test_bin_exclusions(l1b_hists): # TODO test excluding bins as well From 0306163ef4c31f92ea27b5e04e59637ebecec882 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 15 May 2026 22:20:32 -0400 Subject: [PATCH 4/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml b/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml index 094e3065af..05d9b890f0 100644 --- a/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml +++ b/imap_processing/cdf/config/imap_glows_l2_variable_attrs.yaml @@ -11,8 +11,8 @@ bins_label: VAR_TYPE: metadata flags_label: -# CATDESC: L1B flag identifier for daily-ocurrence counter - CATDESC: Flag names for daily-occurence counters of L1B flags +# CATDESC: L1B flag identifier for daily-occurrence counter + CATDESC: Flag names for daily-occurrence counters of L1B flags FIELDNAM: L1B flag name FORMAT: A42 VAR_TYPE: metadata