From b3990db4f29019aaeb38470e45a8b2eda51915ea Mon Sep 17 00:00:00 2001 From: kraysent Date: Sun, 24 May 2026 11:18:56 +0100 Subject: [PATCH 1/2] add b plotting to the geometry --- uploader/app/structured/geometry/upload.py | 133 +++++++++++++++------ 1 file changed, 94 insertions(+), 39 deletions(-) diff --git a/uploader/app/structured/geometry/upload.py b/uploader/app/structured/geometry/upload.py index b51c94a..8910aaf 100644 --- a/uploader/app/structured/geometry/upload.py +++ b/uploader/app/structured/geometry/upload.py @@ -33,29 +33,62 @@ "isophote": "mag/arcmin2", } -CHART_FIGSIZE = (8, 6) -N_A_BINS = 80 -A_BIN_MIN = 0.05 -A_BIN_MAX = 500.0 -A_BIN_EDGES = np.logspace(np.log10(A_BIN_MIN), np.log10(A_BIN_MAX), N_A_BINS + 1) - - -class _SemiMajorAxisAccumulator: +CHART_FIGSIZE = (12, 6) +N_AXIS_BINS = 80 +AXIS_BIN_MIN = 0.05 +AXIS_BIN_MAX = 500.0 +AXIS_BIN_EDGES = np.logspace(np.log10(AXIS_BIN_MIN), np.log10(AXIS_BIN_MAX), N_AXIS_BINS + 1) + + +def _positive_histogram(values: list[float]) -> np.ndarray: + if not values: + return np.zeros(N_AXIS_BINS, dtype=np.int64) + positive = np.asarray([v for v in values if v > 0], dtype=np.float64) + if positive.size == 0: + return np.zeros(N_AXIS_BINS, dtype=np.int64) + batch_counts, _ = np.histogram(positive, bins=AXIS_BIN_EDGES) + return batch_counts.astype(np.int64) + + +def _plot_axis_panel( + ax: plt.Axes, + counts: np.ndarray, + *, + xlabel: str, + title: str, + vmin: float | None, + vmax: float | None, + vmean: float | None, +) -> None: + centers = np.sqrt(AXIS_BIN_EDGES[:-1] * AXIS_BIN_EDGES[1:]) + widths = np.diff(AXIS_BIN_EDGES) + ax.bar(centers, counts, width=widths, align="center") + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlabel(xlabel) + ax.set_ylabel("Count") + ax.set_title(title) + if vmin is not None and vmin > 0: + ax.axvline(vmin, color="gray", linestyle=":", linewidth=1) + if vmax is not None and vmax > 0: + ax.axvline(vmax, color="gray", linestyle=":", linewidth=1) + if vmean is not None and vmean > 0: + ax.axvline(vmean, color="red", linestyle="--", linewidth=1, label=f"mean={vmean:.2f}") + ax.legend(loc="upper right", fontsize="small") + + +class _GeometryDistributionAccumulator: def __init__(self) -> None: - self._counts = np.zeros(N_A_BINS, dtype=np.int64) + self._a_counts = np.zeros(N_AXIS_BINS, dtype=np.int64) + self._b_counts = np.zeros(N_AXIS_BINS, dtype=np.int64) - def add(self, a_values: list[float]) -> None: - if not a_values: - return - positive = np.asarray([v for v in a_values if v > 0], dtype=np.float64) - if positive.size == 0: - return - batch_counts, _ = np.histogram(positive, bins=A_BIN_EDGES) - self._counts += batch_counts.astype(np.int64) + def add(self, a_values: list[float], b_values: list[float]) -> None: + self._a_counts += _positive_histogram(a_values) + self._b_counts += _positive_histogram(b_values) @property def total(self) -> int: - return int(self._counts.sum()) + return int(self._a_counts.sum()) def emit_image( self, @@ -65,25 +98,32 @@ def emit_image( a_mean: float | None = None, a_min: float | None = None, a_max: float | None = None, + b_mean: float | None = None, + b_min: float | None = None, + b_max: float | None = None, ) -> None: if self.total == 0: return - centers = np.sqrt(A_BIN_EDGES[:-1] * A_BIN_EDGES[1:]) - widths = np.diff(A_BIN_EDGES) - fig, ax = plt.subplots(figsize=CHART_FIGSIZE) - ax.bar(centers, self._counts, width=widths, align="center") - ax.set_xscale("log") - ax.set_yscale("log") - ax.set_xlabel("a (arcsec)") - ax.set_ylabel("Count") - ax.set_title("Semi-major axis distribution") - if a_min is not None and a_min > 0: - ax.axvline(a_min, color="gray", linestyle=":", linewidth=1) - if a_max is not None and a_max > 0: - ax.axvline(a_max, color="gray", linestyle=":", linewidth=1) - if a_mean is not None and a_mean > 0: - ax.axvline(a_mean, color="red", linestyle="--", linewidth=1, label=f"mean={a_mean:.2f}") - ax.legend(loc="upper right", fontsize="small") + fig, (ax_a, ax_b) = plt.subplots(1, 2, figsize=CHART_FIGSIZE) + _plot_axis_panel( + ax_a, + self._a_counts, + xlabel="a (arcsec)", + title="Semi-major axis distribution", + vmin=a_min, + vmax=a_max, + vmean=a_mean, + ) + _plot_axis_panel( + ax_b, + self._b_counts, + xlabel="b (arcsec)", + title="Semi-minor axis distribution", + vmin=b_min, + vmax=b_max, + vmean=b_mean, + ) + fig.tight_layout() report_func(report.image_event_from_figure(fig, caption=caption)) @@ -150,18 +190,22 @@ def upload_geometry_isophotal( a_min = float("inf") a_max = float("-inf") a_sum = 0.0 + b_min = float("inf") + b_max = float("-inf") + b_sum = 0.0 cnt = storage.query( sql.SQL("SELECT COUNT(*) AS cnt FROM rawdata.{}").format(sql.Identifier(table_name)), (), ) total_count = int(cnt[0]["cnt"]) if cnt else 0 processed_rows = 0 - a_dist = _SemiMajorAxisAccumulator() + axis_dist = _GeometryDistributionAccumulator() for rows in rawdata_batches(storage, table_name, sorted(needed_cols), batch_size): batch_ids: list[str] = [] batch_data: list[list[str | float]] = [] batch_a: list[float] = [] + batch_b: list[float] = [] for row in rows: if any(row[col] is None for col in needed_cols): @@ -199,8 +243,13 @@ def upload_geometry_isophotal( a_max = max(a_max, a_val) a_sum += a_val batch_a.append(a_val) + b_val = evaluated["b"] + b_min = min(b_min, b_val) + b_max = max(b_max, b_val) + b_sum += b_val + batch_b.append(b_val) - a_dist.add(batch_a) + axis_dist.add(batch_a, batch_b) if write and batch_ids: handle_call( @@ -225,12 +274,15 @@ def upload_geometry_isophotal( ), ) if uploaded > 0: - a_dist.emit_image( + axis_dist.emit_image( report_func, - caption=f"a distribution: {uploaded} objects", + caption=f"a/b distribution: {uploaded} objects", a_mean=a_sum / uploaded, a_min=a_min, a_max=a_max, + b_mean=b_sum / uploaded, + b_min=b_min, + b_max=b_max, ) total = uploaded + skipped @@ -253,12 +305,15 @@ def row_pct_label(n: int) -> float: ) report_func(report.ProgressEvent(percent=100)) if uploaded > 0: - a_dist.emit_image( + axis_dist.emit_image( report_func, caption=f"Final: {uploaded} objects", a_mean=a_sum / uploaded, a_min=a_min, a_max=a_max, + b_mean=b_sum / uploaded, + b_min=b_min, + b_max=b_max, ) summary = format_table( ("Status", "Count", "%"), From 4efa5d9250c6ee834473d8539b87c639e09984a0 Mon Sep 17 00:00:00 2001 From: kraysent Date: Sun, 24 May 2026 11:44:07 +0100 Subject: [PATCH 2/2] support log units --- tests/test_expression.py | 117 +++++++++++++++++++++++++++++++++ uploader/app/lib/expression.py | 61 +++++++++++++---- 2 files changed, 166 insertions(+), 12 deletions(-) create mode 100644 tests/test_expression.py diff --git a/tests/test_expression.py b/tests/test_expression.py new file mode 100644 index 0000000..3165cd9 --- /dev/null +++ b/tests/test_expression.py @@ -0,0 +1,117 @@ +import astropy.units as u + +from uploader.app.lib.expression import parse + + +def _sample_values() -> tuple[dict[str, float], dict[str, str]]: + values = { + "logd25": 1.5, + "logr25": 0.3, + "e_logd25": 0.05, + "e_logr25": 0.04, + "pa": 190.0, + } + units = { + "logd25": "", + "logr25": "", + "e_logd25": "", + "e_logr25": "", + "pa": "deg", + } + return values, units + + +def test_isophotal_axis_expressions() -> None: + values, units = _sample_values() + a = parse('3 * 10 ** col("logd25") * arcsec').evaluate(values, units) + assert a.unit == u.arcsec + assert abs(a.value - 94.86832980505137) < 1e-6 + + e_a = parse('3 * 10 ** col("logd25") * 2.302585093 * e_logd25 * arcsec').evaluate(values, units) + assert e_a.unit == u.arcsec + assert e_a.value > 0 + + b = parse('3 * 10 ** (col("logd25") - col("logr25")) * arcsec').evaluate(values, units) + assert b.unit == u.arcsec + assert b.value > 0 + + e_b = parse( + '3 * 10 ** (col("logd25") - col("logr25")) * 2.302585093 ' + '* (col("e_logd25") ** 2 + col("e_logr25") ** 2) ** 0.5 * arcsec', + ).evaluate(values, units) + assert e_b.unit == u.arcsec + assert e_b.value > 0 + + +def test_position_angle_modulo() -> None: + values, units = _sample_values() + pa = parse('col("pa") % 180.0').evaluate(values, units) + assert pa.unit == u.deg + assert pa.value == 10.0 + + +def test_isophotal_axis_expressions_with_logarithmic_column_units() -> None: + values, units = _sample_values() + for log_unit in ("mag", "dex"): + units_with_log = {**units, "logd25": log_unit, "logr25": log_unit, "e_logd25": log_unit, "e_logr25": log_unit} + a = parse('3 * 10 ** col("logd25") * arcsec').evaluate(values, units_with_log) + assert a.unit == u.arcsec + assert abs(a.value - 94.86832980505137) < 1e-6 + + e_a = parse('3 * 10 ** col("logd25") * 2.302585093 * e_logd25 * arcsec').evaluate(values, units_with_log) + assert e_a.unit == u.arcsec + assert e_a.value > 0 + + e_b = parse( + '3 * 10 ** (col("logd25") - col("logr25")) * 2.302585093 ' + '* (col("e_logd25") ** 2 + col("e_logr25") ** 2) ** 0.5 * arcsec', + ).evaluate(values, units_with_log) + assert e_b.unit == u.arcsec + assert e_b.value > 0 + + +def test_isophotal_axis_expressions_with_hyperleda_units() -> None: + values = { + "logd25": 0.697, + "logr25": 0.13, + "e_logd25": 0.079, + "e_logr25": 0.028, + "pa": 161.14, + } + units = { + "logd25": "dex(0.1 arcmin)", + "logr25": "dex", + "e_logd25": "dex(0.1 arcmin)", + "e_logr25": "dex", + "pa": "deg", + } + a = parse('3 * 10 ** col("logd25") * arcsec').evaluate(values, units) + assert a.unit == u.arcsec + assert a.value > 0 + + e_a = parse('3 * 10 ** col("logd25") * 2.302585093 * e_logd25 * arcsec').evaluate(values, units) + assert e_a.unit == u.arcsec + assert e_a.value > 0 + + b = parse('3 * 10 ** (col("logd25") - col("logr25")) * arcsec').evaluate(values, units) + assert b.unit == u.arcsec + assert b.value > 0 + + e_b = parse( + '3 * 10 ** (col("logd25") - col("logr25")) * 2.302585093 ' + '* (col("e_logd25") ** 2 + col("e_logr25") ** 2) ** 0.5 * arcsec', + ).evaluate(values, units) + assert e_b.unit == u.arcsec + assert e_b.value > 0 + + +def test_surface_brightness_column_keeps_units() -> None: + values = {"bri25": 23.162} + units = {"bri25": "mag / arcsec2"} + bri25 = parse('col("bri25")').evaluate(values, units) + assert bri25.unit == u.Unit("mag/arcsec2") + + +def test_bare_column_referenced_in_parse() -> None: + expr = parse("3 * 10 ** col('logd25') * 2.302585093 * e_logd25 * arcsec") + assert expr.referenced_columns == {"logd25", "e_logd25"} diff --git a/uploader/app/lib/expression.py b/uploader/app/lib/expression.py index 3c55e4b..ac65b15 100644 --- a/uploader/app/lib/expression.py +++ b/uploader/app/lib/expression.py @@ -7,6 +7,7 @@ import astropy.constants as const import astropy.units as u import numpy as np +from astropy.units.function.core import FunctionUnitBase COL_FUNCTION = "col" @@ -24,10 +25,10 @@ def expression_syntax_help() -> str: constants = ", ".join(sorted(NAMED_CONSTANTS)) return ( - f'Use {COL_FUNCTION}("name") to refer to rawdata columns ' - '(e.g. col("a"), col("SMASB22.5"), col("PA-LEDA")).\n' - "Bare identifiers refer to predefined constants.\n" - "Operators: + - * /.\n" + f'Use {COL_FUNCTION}("name") or bare identifiers to refer to rawdata columns ' + '(e.g. col("a"), e_logd25).\n' + "Bare identifiers that match predefined constants use those constants.\n" + "Operators: + - * / ** %.\n" "Functions: sin(x), cos(x) (argument must be an angle).\n" "Numbers are dimensionless.\n" f"Available constants: {constants}." @@ -38,11 +39,44 @@ def expression_syntax_help() -> str: type _QuantityUnaryOp = Callable[[u.Quantity], u.Quantity] type _QuantityFunc = Callable[[u.Quantity], u.Quantity | float] + +def _mod(left: u.Quantity, right: u.Quantity) -> u.Quantity: + if not right.unit.is_equivalent(u.dimensionless_unscaled): + raise ValueError("modulo divisor must be dimensionless") + return (left.value % right.value) * left.unit + + +def _is_logarithmic_column_unit(unit: u.Unit) -> bool: + return unit == u.mag or unit == u.dex or isinstance(unit, FunctionUnitBase) + + +def _column_quantity(value: float, unit_str: str) -> u.Quantity: + if not unit_str: + return value * u.dimensionless_unscaled + unit = u.Unit(unit_str) + if _is_logarithmic_column_unit(unit): + return value * u.dimensionless_unscaled + return value * unit + + +def _pow(base: u.Quantity, exp: u.Quantity) -> u.Quantity: + if not exp.unit.is_equivalent(u.dimensionless_unscaled): + exp = float(exp.value) * u.dimensionless_unscaled + try: + return operator.pow(base, exp) + except u.UnitTypeError: + if base.unit.is_equivalent(u.dimensionless_unscaled): + return float(base.value**exp.value) * u.dimensionless_unscaled + raise + + _BINOPS: dict[type[ast.operator], _QuantityBinOp] = { ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: _pow, + ast.Mod: _mod, } _UNARYOPS: dict[type[ast.unaryop], _QuantityUnaryOp] = { @@ -104,6 +138,11 @@ def visit_Call(self, node: ast.Call) -> None: for arg in node.args: self.visit(arg) + def visit_Name(self, node: ast.Name) -> None: + if node.id in NAMED_CONSTANTS or node.id in _FUNCTIONS: + return + self.columns.add(node.id) + @final class _Evaluator(ast.NodeVisitor): @@ -120,7 +159,7 @@ def visit(self, node: ast.AST) -> u.Quantity: case ast.Call() as call: return self._call(call) case ast.Name(id=name): - return self._lookup_constant(name) + return self._lookup_name(name) case ast.Constant(value=value): return self._constant(value) case _: @@ -157,18 +196,16 @@ def _call(self, node: ast.Call) -> u.Quantity: return result return float(result) * u.dimensionless_unscaled - def _lookup_constant(self, name: str) -> u.Quantity: + def _lookup_name(self, name: str) -> u.Quantity: constant = NAMED_CONSTANTS.get(name) - if constant is None: - raise ValueError(f"unknown constant {name!r}") - return constant + if constant is not None: + return constant + return self._lookup_column(name) def _lookup_column(self, name: str) -> u.Quantity: if name not in self._values: raise ValueError(f"unknown column {name!r}") - unit_str = self._units.get(name, "") - unit = u.Unit(unit_str) if unit_str else u.dimensionless_unscaled - return self._values[name] * unit + return _column_quantity(self._values[name], self._units.get(name, "")) def _constant(self, value: object) -> u.Quantity: if isinstance(value, bool):