From b3990db4f29019aaeb38470e45a8b2eda51915ea Mon Sep 17 00:00:00 2001
From: kraysent <kraysent@gmail.com>
Date: Sun, 24 May 2026 11:18:56 +0100
Subject: [PATCH 1/2] add b plotting to the geometry

---
 uploader/app/structured/geometry/upload.py | 133 +++++++++++++++------
 1 file changed, 94 insertions(+), 39 deletions(-)

diff --git a/uploader/app/structured/geometry/upload.py b/uploader/app/structured/geometry/upload.py
index b51c94a..8910aaf 100644
--- a/uploader/app/structured/geometry/upload.py
+++ b/uploader/app/structured/geometry/upload.py
@@ -33,29 +33,62 @@
     "isophote": "mag/arcmin2",
 }
 
-CHART_FIGSIZE = (8, 6)
-N_A_BINS = 80
-A_BIN_MIN = 0.05
-A_BIN_MAX = 500.0
-A_BIN_EDGES = np.logspace(np.log10(A_BIN_MIN), np.log10(A_BIN_MAX), N_A_BINS + 1)
-
-
-class _SemiMajorAxisAccumulator:
+CHART_FIGSIZE = (12, 6)
+N_AXIS_BINS = 80
+AXIS_BIN_MIN = 0.05
+AXIS_BIN_MAX = 500.0
+AXIS_BIN_EDGES = np.logspace(np.log10(AXIS_BIN_MIN), np.log10(AXIS_BIN_MAX), N_AXIS_BINS + 1)
+
+
+def _positive_histogram(values: list[float]) -> np.ndarray:
+    if not values:
+        return np.zeros(N_AXIS_BINS, dtype=np.int64)
+    positive = np.asarray([v for v in values if v > 0], dtype=np.float64)
+    if positive.size == 0:
+        return np.zeros(N_AXIS_BINS, dtype=np.int64)
+    batch_counts, _ = np.histogram(positive, bins=AXIS_BIN_EDGES)
+    return batch_counts.astype(np.int64)
+
+
+def _plot_axis_panel(
+    ax: plt.Axes,
+    counts: np.ndarray,
+    *,
+    xlabel: str,
+    title: str,
+    vmin: float | None,
+    vmax: float | None,
+    vmean: float | None,
+) -> None:
+    centers = np.sqrt(AXIS_BIN_EDGES[:-1] * AXIS_BIN_EDGES[1:])
+    widths = np.diff(AXIS_BIN_EDGES)
+    ax.bar(centers, counts, width=widths, align="center")
+    ax.set_xscale("log")
+    ax.set_yscale("log")
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel("Count")
+    ax.set_title(title)
+    if vmin is not None and vmin > 0:
+        ax.axvline(vmin, color="gray", linestyle=":", linewidth=1)
+    if vmax is not None and vmax > 0:
+        ax.axvline(vmax, color="gray", linestyle=":", linewidth=1)
+    if vmean is not None and vmean > 0:
+        ax.axvline(vmean, color="red", linestyle="--", linewidth=1, label=f"mean={vmean:.2f}")
+        ax.legend(loc="upper right", fontsize="small")
+
+
+class _GeometryDistributionAccumulator:
     def __init__(self) -> None:
-        self._counts = np.zeros(N_A_BINS, dtype=np.int64)
+        self._a_counts = np.zeros(N_AXIS_BINS, dtype=np.int64)
+        self._b_counts = np.zeros(N_AXIS_BINS, dtype=np.int64)
 
-    def add(self, a_values: list[float]) -> None:
-        if not a_values:
-            return
-        positive = np.asarray([v for v in a_values if v > 0], dtype=np.float64)
-        if positive.size == 0:
-            return
-        batch_counts, _ = np.histogram(positive, bins=A_BIN_EDGES)
-        self._counts += batch_counts.astype(np.int64)
+    def add(self, a_values: list[float], b_values: list[float]) -> None:
+        self._a_counts += _positive_histogram(a_values)
+        self._b_counts += _positive_histogram(b_values)
 
     @property
     def total(self) -> int:
-        return int(self._counts.sum())
+        return int(self._a_counts.sum())
 
     def emit_image(
         self,
@@ -65,25 +98,32 @@ def emit_image(
         a_mean: float | None = None,
         a_min: float | None = None,
         a_max: float | None = None,
+        b_mean: float | None = None,
+        b_min: float | None = None,
+        b_max: float | None = None,
     ) -> None:
         if self.total == 0:
             return
-        centers = np.sqrt(A_BIN_EDGES[:-1] * A_BIN_EDGES[1:])
-        widths = np.diff(A_BIN_EDGES)
-        fig, ax = plt.subplots(figsize=CHART_FIGSIZE)
-        ax.bar(centers, self._counts, width=widths, align="center")
-        ax.set_xscale("log")
-        ax.set_yscale("log")
-        ax.set_xlabel("a (arcsec)")
-        ax.set_ylabel("Count")
-        ax.set_title("Semi-major axis distribution")
-        if a_min is not None and a_min > 0:
-            ax.axvline(a_min, color="gray", linestyle=":", linewidth=1)
-        if a_max is not None and a_max > 0:
-            ax.axvline(a_max, color="gray", linestyle=":", linewidth=1)
-        if a_mean is not None and a_mean > 0:
-            ax.axvline(a_mean, color="red", linestyle="--", linewidth=1, label=f"mean={a_mean:.2f}")
-            ax.legend(loc="upper right", fontsize="small")
+        fig, (ax_a, ax_b) = plt.subplots(1, 2, figsize=CHART_FIGSIZE)
+        _plot_axis_panel(
+            ax_a,
+            self._a_counts,
+            xlabel="a (arcsec)",
+            title="Semi-major axis distribution",
+            vmin=a_min,
+            vmax=a_max,
+            vmean=a_mean,
+        )
+        _plot_axis_panel(
+            ax_b,
+            self._b_counts,
+            xlabel="b (arcsec)",
+            title="Semi-minor axis distribution",
+            vmin=b_min,
+            vmax=b_max,
+            vmean=b_mean,
+        )
+        fig.tight_layout()
         report_func(report.image_event_from_figure(fig, caption=caption))
 
 
@@ -150,18 +190,22 @@ def upload_geometry_isophotal(
     a_min = float("inf")
     a_max = float("-inf")
     a_sum = 0.0
+    b_min = float("inf")
+    b_max = float("-inf")
+    b_sum = 0.0
     cnt = storage.query(
         sql.SQL("SELECT COUNT(*) AS cnt FROM rawdata.{}").format(sql.Identifier(table_name)),
         (),
     )
     total_count = int(cnt[0]["cnt"]) if cnt else 0
     processed_rows = 0
-    a_dist = _SemiMajorAxisAccumulator()
+    axis_dist = _GeometryDistributionAccumulator()
 
     for rows in rawdata_batches(storage, table_name, sorted(needed_cols), batch_size):
         batch_ids: list[str] = []
         batch_data: list[list[str | float]] = []
         batch_a: list[float] = []
+        batch_b: list[float] = []
 
         for row in rows:
             if any(row[col] is None for col in needed_cols):
@@ -199,8 +243,13 @@ def upload_geometry_isophotal(
             a_max = max(a_max, a_val)
             a_sum += a_val
             batch_a.append(a_val)
+            b_val = evaluated["b"]
+            b_min = min(b_min, b_val)
+            b_max = max(b_max, b_val)
+            b_sum += b_val
+            batch_b.append(b_val)
 
-        a_dist.add(batch_a)
+        axis_dist.add(batch_a, batch_b)
 
         if write and batch_ids:
             handle_call(
@@ -225,12 +274,15 @@ def upload_geometry_isophotal(
             ),
         )
         if uploaded > 0:
-            a_dist.emit_image(
+            axis_dist.emit_image(
                 report_func,
-                caption=f"a distribution: {uploaded} objects",
+                caption=f"a/b distribution: {uploaded} objects",
                 a_mean=a_sum / uploaded,
                 a_min=a_min,
                 a_max=a_max,
+                b_mean=b_sum / uploaded,
+                b_min=b_min,
+                b_max=b_max,
             )
 
     total = uploaded + skipped
@@ -253,12 +305,15 @@ def row_pct_label(n: int) -> float:
         )
     report_func(report.ProgressEvent(percent=100))
     if uploaded > 0:
-        a_dist.emit_image(
+        axis_dist.emit_image(
             report_func,
             caption=f"Final: {uploaded} objects",
             a_mean=a_sum / uploaded,
             a_min=a_min,
             a_max=a_max,
+            b_mean=b_sum / uploaded,
+            b_min=b_min,
+            b_max=b_max,
         )
     summary = format_table(
         ("Status", "Count", "%"),

From 4efa5d9250c6ee834473d8539b87c639e09984a0 Mon Sep 17 00:00:00 2001
From: kraysent <kraysent@gmail.com>
Date: Sun, 24 May 2026 11:44:07 +0100
Subject: [PATCH 2/2] support log units

---
 tests/test_expression.py       | 117 +++++++++++++++++++++++++++++++++
 uploader/app/lib/expression.py |  61 +++++++++++++----
 2 files changed, 166 insertions(+), 12 deletions(-)
 create mode 100644 tests/test_expression.py

diff --git a/tests/test_expression.py b/tests/test_expression.py
new file mode 100644
index 0000000..3165cd9
--- /dev/null
+++ b/tests/test_expression.py
@@ -0,0 +1,117 @@
+import astropy.units as u
+
+from uploader.app.lib.expression import parse
+
+
+def _sample_values() -> tuple[dict[str, float], dict[str, str]]:
+    values = {
+        "logd25": 1.5,
+        "logr25": 0.3,
+        "e_logd25": 0.05,
+        "e_logr25": 0.04,
+        "pa": 190.0,
+    }
+    units = {
+        "logd25": "",
+        "logr25": "",
+        "e_logd25": "",
+        "e_logr25": "",
+        "pa": "deg",
+    }
+    return values, units
+
+
+def test_isophotal_axis_expressions() -> None:
+    values, units = _sample_values()
+    a = parse('3 * 10 ** col("logd25") * arcsec').evaluate(values, units)
+    assert a.unit == u.arcsec
+    assert abs(a.value - 94.86832980505137) < 1e-6
+
+    e_a = parse('3 * 10 ** col("logd25") * 2.302585093 * e_logd25 * arcsec').evaluate(values, units)
+    assert e_a.unit == u.arcsec
+    assert e_a.value > 0
+
+    b = parse('3 * 10 ** (col("logd25") - col("logr25")) * arcsec').evaluate(values, units)
+    assert b.unit == u.arcsec
+    assert b.value > 0
+
+    e_b = parse(
+        '3 * 10 ** (col("logd25") - col("logr25")) * 2.302585093 '
+        '* (col("e_logd25") ** 2 + col("e_logr25") ** 2) ** 0.5 * arcsec',
+    ).evaluate(values, units)
+    assert e_b.unit == u.arcsec
+    assert e_b.value > 0
+
+
+def test_position_angle_modulo() -> None:
+    values, units = _sample_values()
+    pa = parse('col("pa") % 180.0').evaluate(values, units)
+    assert pa.unit == u.deg
+    assert pa.value == 10.0
+
+
+def test_isophotal_axis_expressions_with_logarithmic_column_units() -> None:
+    values, units = _sample_values()
+    for log_unit in ("mag", "dex"):
+        units_with_log = {**units, "logd25": log_unit, "logr25": log_unit, "e_logd25": log_unit, "e_logr25": log_unit}
+        a = parse('3 * 10 ** col("logd25") * arcsec').evaluate(values, units_with_log)
+        assert a.unit == u.arcsec
+        assert abs(a.value - 94.86832980505137) < 1e-6
+
+        e_a = parse('3 * 10 ** col("logd25") * 2.302585093 * e_logd25 * arcsec').evaluate(values, units_with_log)
+        assert e_a.unit == u.arcsec
+        assert e_a.value > 0
+
+        e_b = parse(
+            '3 * 10 ** (col("logd25") - col("logr25")) * 2.302585093 '
+            '* (col("e_logd25") ** 2 + col("e_logr25") ** 2) ** 0.5 * arcsec',
+        ).evaluate(values, units_with_log)
+        assert e_b.unit == u.arcsec
+        assert e_b.value > 0
+
+
+def test_isophotal_axis_expressions_with_hyperleda_units() -> None:
+    values = {
+        "logd25": 0.697,
+        "logr25": 0.13,
+        "e_logd25": 0.079,
+        "e_logr25": 0.028,
+        "pa": 161.14,
+    }
+    units = {
+        "logd25": "dex(0.1 arcmin)",
+        "logr25": "dex",
+        "e_logd25": "dex(0.1 arcmin)",
+        "e_logr25": "dex",
+        "pa": "deg",
+    }
+    a = parse('3 * 10 ** col("logd25") * arcsec').evaluate(values, units)
+    assert a.unit == u.arcsec
+    assert a.value > 0
+
+    e_a = parse('3 * 10 ** col("logd25") * 2.302585093 * e_logd25 * arcsec').evaluate(values, units)
+    assert e_a.unit == u.arcsec
+    assert e_a.value > 0
+
+    b = parse('3 * 10 ** (col("logd25") - col("logr25")) * arcsec').evaluate(values, units)
+    assert b.unit == u.arcsec
+    assert b.value > 0
+
+    e_b = parse(
+        '3 * 10 ** (col("logd25") - col("logr25")) * 2.302585093 '
+        '* (col("e_logd25") ** 2 + col("e_logr25") ** 2) ** 0.5 * arcsec',
+    ).evaluate(values, units)
+    assert e_b.unit == u.arcsec
+    assert e_b.value > 0
+
+
+def test_surface_brightness_column_keeps_units() -> None:
+    values = {"bri25": 23.162}
+    units = {"bri25": "mag / arcsec2"}
+    bri25 = parse('col("bri25")').evaluate(values, units)
+    assert bri25.unit == u.Unit("mag/arcsec2")
+
+
+def test_bare_column_referenced_in_parse() -> None:
+    expr = parse("3 * 10 ** col('logd25') * 2.302585093 * e_logd25 * arcsec")
+    assert expr.referenced_columns == {"logd25", "e_logd25"}
diff --git a/uploader/app/lib/expression.py b/uploader/app/lib/expression.py
index 3c55e4b..ac65b15 100644
--- a/uploader/app/lib/expression.py
+++ b/uploader/app/lib/expression.py
@@ -7,6 +7,7 @@
 import astropy.constants as const
 import astropy.units as u
 import numpy as np
+from astropy.units.function.core import FunctionUnitBase
 
 COL_FUNCTION = "col"
 
@@ -24,10 +25,10 @@
 def expression_syntax_help() -> str:
     constants = ", ".join(sorted(NAMED_CONSTANTS))
     return (
-        f'Use {COL_FUNCTION}("name") to refer to rawdata columns '
-        '(e.g. col("a"), col("SMASB22.5"), col("PA-LEDA")).\n'
-        "Bare identifiers refer to predefined constants.\n"
-        "Operators: + - * /.\n"
+        f'Use {COL_FUNCTION}("name") or bare identifiers to refer to rawdata columns '
+        '(e.g. col("a"), e_logd25).\n'
+        "Bare identifiers that match predefined constants use those constants.\n"
+        "Operators: + - * / ** %.\n"
         "Functions: sin(x), cos(x) (argument must be an angle).\n"
         "Numbers are dimensionless.\n"
         f"Available constants: {constants}."
@@ -38,11 +39,44 @@ def expression_syntax_help() -> str:
 type _QuantityUnaryOp = Callable[[u.Quantity], u.Quantity]
 type _QuantityFunc = Callable[[u.Quantity], u.Quantity | float]
 
+
+def _mod(left: u.Quantity, right: u.Quantity) -> u.Quantity:
+    if not right.unit.is_equivalent(u.dimensionless_unscaled):
+        raise ValueError("modulo divisor must be dimensionless")
+    return (left.value % right.value) * left.unit
+
+
+def _is_logarithmic_column_unit(unit: u.Unit) -> bool:
+    return unit == u.mag or unit == u.dex or isinstance(unit, FunctionUnitBase)
+
+
+def _column_quantity(value: float, unit_str: str) -> u.Quantity:
+    if not unit_str:
+        return value * u.dimensionless_unscaled
+    unit = u.Unit(unit_str)
+    if _is_logarithmic_column_unit(unit):
+        return value * u.dimensionless_unscaled
+    return value * unit
+
+
+def _pow(base: u.Quantity, exp: u.Quantity) -> u.Quantity:
+    if not exp.unit.is_equivalent(u.dimensionless_unscaled):
+        exp = float(exp.value) * u.dimensionless_unscaled
+    try:
+        return operator.pow(base, exp)
+    except u.UnitTypeError:
+        if base.unit.is_equivalent(u.dimensionless_unscaled):
+            return float(base.value**exp.value) * u.dimensionless_unscaled
+        raise
+
+
 _BINOPS: dict[type[ast.operator], _QuantityBinOp] = {
     ast.Add: operator.add,
     ast.Sub: operator.sub,
     ast.Mult: operator.mul,
     ast.Div: operator.truediv,
+    ast.Pow: _pow,
+    ast.Mod: _mod,
 }
 
 _UNARYOPS: dict[type[ast.unaryop], _QuantityUnaryOp] = {
@@ -104,6 +138,11 @@ def visit_Call(self, node: ast.Call) -> None:
         for arg in node.args:
             self.visit(arg)
 
+    def visit_Name(self, node: ast.Name) -> None:
+        if node.id in NAMED_CONSTANTS or node.id in _FUNCTIONS:
+            return
+        self.columns.add(node.id)
+
 
 @final
 class _Evaluator(ast.NodeVisitor):
@@ -120,7 +159,7 @@ def visit(self, node: ast.AST) -> u.Quantity:
             case ast.Call() as call:
                 return self._call(call)
             case ast.Name(id=name):
-                return self._lookup_constant(name)
+                return self._lookup_name(name)
             case ast.Constant(value=value):
                 return self._constant(value)
             case _:
@@ -157,18 +196,16 @@ def _call(self, node: ast.Call) -> u.Quantity:
             return result
         return float(result) * u.dimensionless_unscaled
 
-    def _lookup_constant(self, name: str) -> u.Quantity:
+    def _lookup_name(self, name: str) -> u.Quantity:
         constant = NAMED_CONSTANTS.get(name)
-        if constant is None:
-            raise ValueError(f"unknown constant {name!r}")
-        return constant
+        if constant is not None:
+            return constant
+        return self._lookup_column(name)
 
     def _lookup_column(self, name: str) -> u.Quantity:
         if name not in self._values:
             raise ValueError(f"unknown column {name!r}")
-        unit_str = self._units.get(name, "")
-        unit = u.Unit(unit_str) if unit_str else u.dimensionless_unscaled
-        return self._values[name] * unit
+        return _column_quantity(self._values[name], self._units.get(name, ""))
 
     def _constant(self, value: object) -> u.Quantity:
         if isinstance(value, bool):