diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..410d1be --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,9 @@ +^spatial-modeling-with-claude$ +^_pkgdown\.yml$ +^pkgdown$ +^docs$ +^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ +^LICENSE\.md$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..d855493 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,50 @@ +# R CMD check workflow for TissueField +# Runs on ubuntu (release + devel), macos, and windows. + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..e9000a3 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# pkgdown site deployment for TissueField +# Deploys to gh-pages on push to main/master. + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..7919c2c --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,34 @@ +Package: TissueField +Title: Steady-State Molecular Concentration Fields for Spatial Transcriptomics +Version: 0.1.0 +Authors@R: person("Raredon Laboratory", role = c("aut", "cre"), + email = "raredon@yale.edu") +Description: Computes continuous steady-state molecular concentration fields + from discrete mRNA transcript or protein detection coordinates, using a + physically motivated diffusion-clearance model. Solves the screened + Poisson PDE (D * nabla^2 C - lambda * C + s = 0) via one of three + methods: finite-difference sparse linear system, Green's function FFT + convolution, or Gaussian kernel density estimation. Designed for use + with spatial transcriptomics and spatial proteomics data. Works + naturally with tissue masks produced by the TissueMask package. +License: MIT + file LICENSE +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.3.3 +Depends: R (>= 4.1.0) +Imports: + sf (>= 1.0-0), + Matrix (>= 1.3-0), + stats, + parallel +Suggests: + ggplot2, + patchwork, + testthat (>= 3.0.0), + knitr, + rmarkdown, + TissueMask +Config/testthat/edition: 3 +VignetteBuilder: knitr +URL: https://github.com/RaredonLab/TissueField +BugReports: https://github.com/RaredonLab/TissueField/issues diff --git a/LICENSE b/LICENSE index 41f0f84..a46a28d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,2 @@ -MIT License - -Copyright (c) 2026 Raredon Lab - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2026 +COPYRIGHT HOLDER: Raredon Laboratory diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..3fb6ede --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2026 Raredon Laboratory + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..0cb9e09 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,13 @@ +# Generated by roxygen2: do not edit by hand + +export(estimate_concentration_field) +export(field_to_df) +export(plot_concentration_field) +export(sweep_diffusion_length) +importFrom(sf,st_as_sf) +importFrom(sf,st_bbox) +importFrom(sf,st_crs) +importFrom(sf,st_union) +importFrom(sf,st_within) +importFrom(stats,complete.cases) +importFrom(stats,filter) diff --git a/R/TissueField-package.R b/R/TissueField-package.R new file mode 100644 index 0000000..a447eba --- /dev/null +++ b/R/TissueField-package.R @@ -0,0 +1,14 @@ +#' TissueField: Steady-State Molecular Concentration Fields for Spatial Transcriptomics +#' +#' Computes continuous steady-state molecular concentration fields from discrete +#' mRNA transcript or protein detection coordinates using a physically motivated +#' diffusion-clearance model. The screened Poisson PDE is solved numerically +#' using one of three methods: finite-difference sparse linear system (`"fd"`), +#' Green's function FFT convolution (`"green"`), or Gaussian kernel density +#' (`"kde"`). +#' +#' @keywords internal +"_PACKAGE" + +## Suppress R CMD check NOTE for bare variable names used in ggplot2 aes() +utils::globalVariables(c("x", "y", "field", ".data")) diff --git a/R/estimate_concentration_field.R b/R/estimate_concentration_field.R new file mode 100644 index 0000000..5a29e11 --- /dev/null +++ b/R/estimate_concentration_field.R @@ -0,0 +1,594 @@ +# ============================================================ +# estimate_concentration_field.R +# +# Models steady-state continuous molecular concentration over a +# spatial mask from individual mRNA transcript point sources. +# +# Physical model: D * nabla^2 C(x) - lambda * C(x) + s(x) = 0 +# +# Three solvers: +# "fd" -- finite-difference sparse linear system (recommended) +# "green" -- FFT convolution with 2D Green's function K0(kappa*r) +# "kde" -- Gaussian kernel smoothing (phenomenological baseline) +# ============================================================ + +#' Estimate a steady-state molecular concentration field +#' +#' Solves the screened Poisson (diffusion-clearance) PDE over a spatial mask +#' to produce a continuous concentration field from discrete transcript or +#' protein detection coordinates: +#' +#' \deqn{D \nabla^2 C(\mathbf{x}) - \lambda C(\mathbf{x}) + s(\mathbf{x}) = 0} +#' +#' The key derived quantity is the **diffusion length** \eqn{L = \sqrt{D/\lambda}}, +#' which sets the characteristic decay distance from each source. Three solvers +#' are available: `"fd"` (finite-difference, recommended), `"green"` (Green's +#' function via FFT), and `"kde"` (Gaussian kernel density, phenomenological). +#' +#' @param mask An `sfc` or `sf` polygon object defining the tissue boundary. +#' Typically the output of `TissueMask::fit_spatial_mask()`. Multi-polygon +#' objects (holes, islands) are supported. +#' @param transcript_coords A `data.frame` or matrix with columns `x` and `y` +#' (or first two columns used if names differ) giving transcript or detection +#' coordinates in the same coordinate system as `mask`. +#' @param D Diffusion coefficient (arbitrary spatial units squared per time). +#' Controls the amplitude of the field (\eqn{C \propto 1/D}); the spatial +#' pattern depends only on `diffusion_length`. Default `1.0`. +#' @param lambda First-order clearance rate. Must be `> 0` unless +#' `diffusion_length` is supplied. Default `0.1`. +#' @param production_rate Scalar multiplier applied to all source weights. +#' If `weight_col` is `NULL` each transcript contributes `production_rate` +#' units of source strength. Default `1.0`. +#' @param diffusion_length Optional. If supplied, overrides `D` and `lambda` +#' and sets \eqn{L} directly. Useful for sweeping the spatial scale without +#' changing amplitude. +#' @param weight_col Optional character string naming a column in +#' `transcript_coords` to use as per-transcript weights (e.g. `"umi"`). +#' Weights are multiplied by `production_rate`. +#' @param method Solver. One of: +#' \describe{ +#' \item{`"fd"`}{Finite-difference sparse linear system (default). Supports +#' exact Dirichlet or Neumann boundary conditions. Recommended for most +#' use cases.} +#' \item{`"green"`}{FFT convolution with the analytic 2-D Green's function +#' \eqn{K_0(\kappa r)}. Ignores domain boundary; fast for parameter +#' sweeps.} +#' \item{`"kde"`}{Gaussian kernel density estimate. Phenomenological +#' baseline; no physical boundary conditions.} +#' } +#' @param grid_resolution Integer. Number of grid cells per axis (the grid is +#' `grid_resolution x grid_resolution`). Default `256L`. +#' @param boundary_condition `"dirichlet"` (concentration zero at boundary) or +#' `"neumann"` (zero normal flux; molecules reflected). Only applies when +#' `method = "fd"`. Default `"dirichlet"`. +#' @param fd_solver `"direct"` (LU factorisation via [Matrix::solve()]) or +#' `"iterative"` (red-black SOR). Direct is exact and fast for +#' `grid_resolution <= 400`; iterative avoids the dense LU at very high +#' resolution. Only applies when `method = "fd"`. Default `"direct"`. +#' @param sor_omega SOR relaxation parameter (0 < omega < 2). Default `1.7`. +#' @param sor_tol Convergence tolerance for SOR. Default `1e-6`. +#' @param sor_max_iter Maximum SOR iterations. Default `1500L`. +#' @param green_r_min_factor Regularisation for the Green's function singularity +#' at r = 0; sets r_min = green_r_min_factor * h. Default `0.5`. +#' @param kde_bandwidth Bandwidth for the KDE smoother. Defaults to `L` when +#' `method = "kde"`. +#' @param include_external Logical. If `TRUE`, transcripts outside `mask` are +#' included as sources. Default `FALSE`. +#' @param normalize Logical. If `TRUE`, rescales the field to `[0, 1]` after +#' solving. Default `FALSE`. +#' @param log_transform Logical. If `TRUE`, applies `log1p()` to the field +#' after solving (and after `clip_negative`). Default `FALSE`. +#' @param clip_negative Logical. If `TRUE`, sets small negative values (solver +#' artefacts) to zero. Default `TRUE`. +#' @param n_cores Integer. Number of cores for mask rasterisation. Parallel +#' rasterisation is available on Unix-like systems only. Default `1L`. +#' @param return_sources Logical. Include the binned source density matrix in +#' the return value. Default `TRUE`. +#' @param plot Logical. If `TRUE` and `ggplot2` is installed, print a quick +#' field plot. Default `FALSE`. +#' @param verbose Logical. Print progress messages. Default `TRUE`. +#' +#' @return A named list with elements: +#' \describe{ +#' \item{`field`}{`N x N` numeric matrix; `NA` outside the mask.} +#' \item{`x`, `y`}{Grid centre coordinates (length `N` each).} +#' \item{`hx`, `hy`}{Grid cell width and height.} +#' \item{`mask`}{`N x N` logical matrix (`TRUE` = inside mask).} +#' \item{`sources`}{`N x N` binned source density (or `NULL` when +#' `return_sources = FALSE`).} +#' \item{`params`}{List of all resolved input parameters.} +#' \item{`diagnostics`}{Timing and cell/transcript counts.} +#' } +#' +#' @examples +#' library(sf) +#' set.seed(1) +#' sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +#' mask <- st_sfc(sq) +#' tc <- data.frame(x = runif(80, 1, 9), y = runif(80, 1, 9)) +#' res <- estimate_concentration_field(mask, tc, D = 1, lambda = 0.2, +#' grid_resolution = 64L, verbose = FALSE) +#' str(res$field) +#' +#' @importFrom sf st_union st_as_sf st_within st_bbox st_crs +#' @importFrom stats complete.cases +#' @export +estimate_concentration_field <- function( + mask, + transcript_coords, + D = 1.0, + lambda = 0.1, + production_rate = 1.0, + diffusion_length = NULL, + weight_col = NULL, + method = "fd", + grid_resolution = 256L, + boundary_condition = "dirichlet", + fd_solver = "direct", + sor_omega = 1.7, + sor_tol = 1e-6, + sor_max_iter = 1500L, + green_r_min_factor = 0.5, + kde_bandwidth = NULL, + include_external = FALSE, + normalize = FALSE, + log_transform = FALSE, + clip_negative = TRUE, + n_cores = 1L, + return_sources = TRUE, + plot = FALSE, + verbose = TRUE +) { + t0 <- proc.time()["elapsed"] + + # 0. Package checks + req <- "sf" + if (method == "fd" && fd_solver == "direct") req <- c(req, "Matrix") + miss <- req[!sapply(req, requireNamespace, quietly = TRUE)] + if (length(miss) > 0L) + stop("Install required packages: install.packages(c(", + paste0('"', miss, '"', collapse = ", "), "))") + + # 1. Physics + if (!is.null(diffusion_length)) { + L <- as.numeric(diffusion_length) + kappa2 <- 1 / L^2 + kappa <- 1 / L + } else { + if (lambda <= 0 && method != "kde") + stop("`lambda` must be > 0 for a finite steady-state field.") + if (lambda > 0) { + L <- sqrt(D / lambda) + kappa2 <- lambda / D + kappa <- sqrt(kappa2) + } else { + L <- Inf; kappa2 <- 0; kappa <- 0 + } + } + N <- as.integer(grid_resolution) + nx_c <- as.integer(n_cores) + + if (verbose) { + cat("============================================================\n") + cat(" estimate_concentration_field\n") + cat("============================================================\n") + cat(sprintf(" Method: %s | D: %g | lambda: %g | L: %g | N: %d x %d\n", + method, D, lambda, L, N, N)) + if (method == "fd") + cat(sprintf(" BC: %s | solver: %s\n", boundary_condition, fd_solver)) + cat("------------------------------------------------------------\n") + } + + # 2. Parse transcripts + if (is.matrix(transcript_coords)) transcript_coords <- as.data.frame(transcript_coords) + if (!all(c("x", "y") %in% names(transcript_coords))) + names(transcript_coords)[1:2] <- c("x", "y") + tc <- transcript_coords[stats::complete.cases(transcript_coords[, c("x", "y")]), ] + weights <- if (!is.null(weight_col)) + tc[[weight_col]] * production_rate + else + rep(production_rate, nrow(tc)) + + # 3. Filter transcripts by mask + mu_sf <- sf::st_union(mask) + tc_sf <- sf::st_as_sf(tc[, c("x", "y")], coords = c("x", "y"), crs = sf::st_crs(mask)) + in_msk <- as.logical(sf::st_within(tc_sf, mu_sf, sparse = FALSE)[, 1L]) + n_tot <- nrow(tc) + n_ext <- sum(!in_msk) + if (!include_external) { tc <- tc[in_msk, ]; weights <- weights[in_msk] } + n_src <- nrow(tc) + if (n_src == 0L) + stop("No transcripts available. Check coordinates or set include_external = TRUE.") + if (verbose) + cat(sprintf(" Transcripts: %d used (%d external %s)\n", + n_src, n_ext, if (include_external) "included" else "excluded")) + + # 4. Grid + bb <- sf::st_bbox(mu_sf); pad <- 0.03 + xpad <- (bb["xmax"] - bb["xmin"]) * pad + ypad <- (bb["ymax"] - bb["ymin"]) * pad + x_breaks <- seq(bb["xmin"] - xpad, bb["xmax"] + xpad, length.out = N + 1L) + y_breaks <- seq(bb["ymin"] - ypad, bb["ymax"] + ypad, length.out = N + 1L) + x_centers <- (x_breaks[-1L] + x_breaks[-(N + 1L)]) * 0.5 + y_centers <- (y_breaks[-1L] + y_breaks[-(N + 1L)]) * 0.5 + hx <- x_centers[2L] - x_centers[1L] + hy <- y_centers[2L] - y_centers[1L] + h <- (hx + hy) * 0.5 + + # 5. Rasterise mask + if (verbose) cat(sprintf(" Rasterizing %dx%d grid...\n", N, N)) + gdf <- expand.grid(x = x_centers, y = y_centers) + gc_sf <- sf::st_as_sf(gdf, coords = c("x", "y"), crs = sf::st_crs(mask)) + mask_mat <- .rasterize_mask(gc_sf, mu_sf, N, nx_c) + n_inside <- sum(mask_mat) + if (verbose) + cat(sprintf(" Inside-mask cells: %d (%.1f%%)\n", n_inside, 100 * n_inside / N^2)) + + # 6. Bin sources + xi_s <- pmax(1L, pmin(N, findInterval(tc$x, x_breaks, rightmost.closed = TRUE))) + yi_s <- pmax(1L, pmin(N, findInterval(tc$y, y_breaks, rightmost.closed = TRUE))) + lin_s <- xi_s + (yi_s - 1L) * N + src_vec <- numeric(N * N) + wt_agg <- rowsum(matrix(weights, ncol = 1L), lin_s) + src_vec[as.integer(rownames(wt_agg))] <- wt_agg[, 1L] + source_mat <- matrix(src_vec, nrow = N, ncol = N) + + # 7. Solve + t_solve <- proc.time()["elapsed"] + + field_mat <- switch(method, + + "fd" = { + inside_lin <- which(mask_mat); K <- length(inside_lin) + cell2sys <- integer(N * N); cell2sys[inside_lin] <- seq_len(K) + xi_in <- ((inside_lin - 1L) %% N) + 1L + yi_in <- ((inside_lin - 1L) %/% N) + 1L + nbr_xm <- ifelse(xi_in > 1L, inside_lin - 1L, 0L) + nbr_xp <- ifelse(xi_in < N, inside_lin + 1L, 0L) + nbr_ym <- ifelse(yi_in > 1L, inside_lin - N, 0L) + nbr_yp <- ifelse(yi_in < N, inside_lin + N, 0L) + nx_w <- 1 / hx^2; ny_w <- 1 / hy^2 + is_in_mask <- function(nb) { + ok <- nb > 0L & nb <= N * N + res <- logical(length(nb)) + res[ok] <- mask_mat[nb[ok]] + res + } + nbr_list <- list(nbr_xm, nbr_xp, nbr_ym, nbr_yp) + wt_list <- c(nx_w, nx_w, ny_w, ny_w) + + if (fd_solver == "direct") { + diag_vals <- rep(-(2 * nx_w + 2 * ny_w + kappa2), K) + off_rows <- integer(0); off_cols <- integer(0); off_vals <- numeric(0) + for (d in seq_along(nbr_list)) { + nb <- nbr_list[[d]]; wt <- wt_list[[d]] + is_in <- is_in_mask(nb) + is_out <- (nb > 0L) & !is_in + is_oob <- (nb == 0L) + sel <- which(is_in) + if (length(sel) > 0L) { + off_rows <- c(off_rows, sel) + off_cols <- c(off_cols, cell2sys[nb[sel]]) + off_vals <- c(off_vals, rep(wt, length(sel))) + } + if (boundary_condition == "neumann") { + nc2 <- which(is_out | is_oob) + diag_vals[nc2] <- diag_vals[nc2] + wt + } + } + A <- Matrix::sparseMatrix( + i = c(seq_len(K), off_rows), + j = c(seq_len(K), off_cols), + x = c(diag_vals, off_vals), + dims = c(K, K) + ) + b_vec <- -source_mat[inside_lin] / (D * hx * hy) + if (verbose) + cat(sprintf(" Sparse system: %dx%d, %d nnz\n", K, K, length(A@x))) + c_vec <- as.numeric(Matrix::solve(A, b_vec)) + out <- matrix(NA_real_, N, N); out[inside_lin] <- c_vec; out + + } else { + # Iterative SOR + if (boundary_condition == "neumann") + message("SOR solver uses Dirichlet BC only; ignoring 'neumann'.") + xi_grid <- matrix(rep(seq_len(N), N), N, N) + yi_grid <- matrix(rep(seq_len(N), each = N), N, N) + is_red <- ((xi_grid + yi_grid) %% 2L) == 0L + ins_red <- mask_mat & is_red + ins_blk <- mask_mat & !is_red + C <- matrix(0, N, N) + denom <- 2 * nx_w + 2 * ny_w + kappa2 + src_dens <- source_mat / (D * hx * hy) + converged <- FALSE + for (iter in seq_len(sor_max_iter)) { + C_prev <- C + Cxm <- rbind(matrix(0, 1, N), C[1:(N - 1), ]) + Cxp <- rbind(C[2:N, ], matrix(0, 1, N)) + Cym <- cbind(matrix(0, N, 1), C[, 1:(N - 1)]) + Cyp <- cbind(C[, 2:N], matrix(0, N, 1)) + Cgs <- (nx_w * (Cxm + Cxp) + ny_w * (Cym + Cyp) + src_dens) / denom + C[ins_red] <- (1 - sor_omega) * C[ins_red] + sor_omega * Cgs[ins_red] + C[!mask_mat] <- 0 + Cxm <- rbind(matrix(0, 1, N), C[1:(N - 1), ]) + Cxp <- rbind(C[2:N, ], matrix(0, 1, N)) + Cym <- cbind(matrix(0, N, 1), C[, 1:(N - 1)]) + Cyp <- cbind(C[, 2:N], matrix(0, N, 1)) + Cgs <- (nx_w * (Cxm + Cxp) + ny_w * (Cym + Cyp) + src_dens) / denom + C[ins_blk] <- (1 - sor_omega) * C[ins_blk] + sor_omega * Cgs[ins_blk] + C[!mask_mat] <- 0 + delta <- max(abs(C[mask_mat] - C_prev[mask_mat])) + if (is.finite(delta) && delta < sor_tol) { + if (verbose) + cat(sprintf(" SOR converged: iter=%d, delta=%.2e\n", iter, delta)) + converged <- TRUE + break + } + } + if (!converged) + warning("SOR did not converge in ", sor_max_iter, " iterations.") + out <- matrix(NA_real_, N, N); out[mask_mat] <- C[mask_mat]; out + } + }, + + "green" = { + if (kappa <= 0) stop("method = 'green' requires lambda > 0.") + Np <- 2L * N + xi_k <- c(0L:(N - 1L), (-N):(-1L)) + yi_k <- c(0L:(N - 1L), (-N):(-1L)) + dx_mat <- outer(xi_k, rep(1L, Np)) * hx + dy_mat <- outer(rep(1L, Np), yi_k) * hy + r_mat <- sqrt(dx_mat^2 + dy_mat^2) + r_min <- green_r_min_factor * h + r_mat[r_mat < r_min] <- r_min + G_mat <- besselK(kappa * r_mat, nu = 0) * (hx * hy) / (2 * pi * D) + G_fft <- stats::fft(G_mat) + S_pad <- matrix(0, Np, Np) + S_pad[1L:N, 1L:N] <- source_mat / (hx * hy) + C_conv <- Re(stats::fft(G_fft * stats::fft(S_pad), inverse = TRUE)) / Np^2 + out <- matrix(NA_real_, N, N) + out[mask_mat] <- C_conv[1L:N, 1L:N][mask_mat] + out + }, + + "kde" = { + bw <- if (!is.null(kde_bandwidth)) as.numeric(kde_bandwidth) else + if (is.finite(L)) L else + min(diff(range(x_centers)), diff(range(y_centers))) * 0.05 + if (verbose) cat(sprintf(" KDE bandwidth: %g\n", bw)) + sm <- .sep_gauss_smooth_2d(source_mat, bw / hx, bw / hy) + out <- matrix(NA_real_, N, N); out[mask_mat] <- sm[mask_mat]; out + }, + + stop("Unknown method '", method, "'. Choose one of: fd, green, kde.") + ) + + t_solve_end <- proc.time()["elapsed"] + + # 8. Post-processing + field_mat[!mask_mat] <- NA_real_ + if (clip_negative) field_mat[!is.na(field_mat) & field_mat < 0] <- 0 + if (log_transform) field_mat <- log1p(field_mat) + if (normalize) { + fmn <- min(field_mat, na.rm = TRUE); fmx <- max(field_mat, na.rm = TRUE) + if (fmx > fmn) field_mat <- (field_mat - fmn) / (fmx - fmn) + } + + t_total <- proc.time()["elapsed"] - t0 + if (verbose) { + cat(sprintf(" Solve: %.2fs | Total: %.2fs | Range: [%.4g, %.4g]\n", + t_solve_end - t_solve, t_total, + min(field_mat, na.rm = TRUE), max(field_mat, na.rm = TRUE))) + cat("============================================================\n") + } + + # 9. Optional quick plot + if (plot && requireNamespace("ggplot2", quietly = TRUE)) { + gdf_p <- expand.grid(x = x_centers, y = y_centers) + gdf_p$field <- as.vector(field_mat) + gdf_p <- gdf_p[!is.na(gdf_p$field), ] + print( + ggplot2::ggplot(gdf_p, ggplot2::aes(x = x, y = y, fill = field)) + + ggplot2::geom_raster(interpolate = TRUE) + + ggplot2::scale_fill_viridis_c(option = "magma", name = "Conc.", + na.value = "transparent") + + ggplot2::coord_equal() + ggplot2::theme_minimal(base_size = 11) + + ggplot2::labs(title = paste0("Concentration | method = '", method, "'"), + x = "X", y = "Y") + ) + } + + # 10. Return + list( + field = field_mat, + x = x_centers, y = y_centers, hx = hx, hy = hy, + mask = mask_mat, + sources = if (return_sources) source_mat else NULL, + params = list( + D = D, + lambda = lambda, + kappa2 = kappa2, + diffusion_length = L, + production_rate = production_rate, + method = method, + grid_resolution = N, + boundary_condition = if (method == "fd") boundary_condition else NA, + fd_solver = if (method == "fd") fd_solver else NA, + include_external = include_external, + log_transform = log_transform, + normalize = normalize + ), + diagnostics = list( + n_transcripts_total = n_tot, + n_transcripts_used = n_src, + n_transcripts_external = n_ext, + n_cells_inside_mask = n_inside, + elapsed_total_s = t_total, + elapsed_solve_s = t_solve_end - t_solve + ) + ) +} + +# ============================================================ +# UTILITY FUNCTIONS +# ============================================================ + +#' Convert a concentration field result to a tidy data frame +#' +#' Expands the `N x N` field matrix into a long `data.frame` suitable for +#' plotting with **ggplot2** or further analysis. +#' +#' @param result A list returned by [estimate_concentration_field()]. +#' @param inside_only Logical. If `TRUE` (default), only rows inside the mask +#' (i.e. non-`NA` field values) are returned. +#' +#' @return A `data.frame` with columns `x`, `y`, `field`, `inside`, and +#' optionally `source`. +#' +#' @examples +#' library(sf) +#' set.seed(1) +#' sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +#' mask <- st_sfc(sq) +#' tc <- data.frame(x = runif(50, 1, 9), y = runif(50, 1, 9)) +#' res <- estimate_concentration_field(mask, tc, grid_resolution = 64L, +#' verbose = FALSE) +#' df <- field_to_df(res) +#' head(df) +#' +#' @export +field_to_df <- function(result, inside_only = TRUE) { + df <- expand.grid(x = result$x, y = result$y) + df$field <- as.vector(result$field) + df$inside <- as.vector(result$mask) + if (!is.null(result$sources)) df$source <- as.vector(result$sources) + if (inside_only) df <- df[df$inside, ] + df +} + +#' Plot a concentration field +#' +#' Produces a **ggplot2** raster visualisation of a field returned by +#' [estimate_concentration_field()]. Optionally overlays transcript point +#' locations and iso-concentration contour lines. +#' +#' @param result A list returned by [estimate_concentration_field()]. +#' @param transcript_coords Optional `data.frame` with columns `x` and `y` to +#' overlay as points. +#' @param show_sources Logical. If `TRUE` and `transcript_coords` is supplied, +#' overlay points. Default `TRUE`. +#' @param show_contours Logical. Overlay iso-concentration contours. Default +#' `FALSE`. +#' @param n_contours Integer. Number of contour levels. Default `8L`. +#' @param palette Viridis palette name (`"magma"`, `"viridis"`, etc.). Default +#' `"magma"`. +#' @param interpolate Logical. Use bilinear interpolation in `geom_raster`. +#' Default `TRUE`. +#' @param log_scale Logical. Plot `log1p(field)` instead of raw field. Default +#' `FALSE`. +#' @param pt_size Point size for transcript overlay. Default `0.4`. +#' @param pt_alpha Point transparency. Default `0.4`. +#' @param pt_color Point colour. Default `"#00e5ff"`. +#' @param title Optional plot title; auto-generated from params if `NULL`. +#' +#' @return A `ggplot` object. +#' +#' @examples +#' library(sf) +#' set.seed(1) +#' sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +#' mask <- st_sfc(sq) +#' tc <- data.frame(x = runif(80, 1, 9), y = runif(80, 1, 9)) +#' res <- estimate_concentration_field(mask, tc, grid_resolution = 64L, +#' verbose = FALSE) +#' if (requireNamespace("ggplot2", quietly = TRUE)) +#' plot_concentration_field(res, tc) +#' +#' @export +plot_concentration_field <- function( + result, transcript_coords = NULL, + show_sources = TRUE, show_contours = FALSE, n_contours = 8L, + palette = "magma", interpolate = TRUE, log_scale = FALSE, + pt_size = 0.4, pt_alpha = 0.4, pt_color = "#00e5ff", title = NULL) { + if (!requireNamespace("ggplot2", quietly = TRUE)) + stop("plot_concentration_field() requires ggplot2. Install it with install.packages('ggplot2').") + df <- field_to_df(result, inside_only = TRUE) + p <- result$params + L_str <- if (is.finite(p$diffusion_length)) round(p$diffusion_length, 3) else "Inf" + tit <- if (is.null(title)) + sprintf("Concentration | %s | D=%g, lambda=%g, L=%s", + p$method, p$D, p$lambda, L_str) + else title + fc <- if (log_scale) { df$lf <- log1p(df$field); "lf" } else "field" + g <- ggplot2::ggplot(df, ggplot2::aes(x = x, y = y, fill = .data[[fc]])) + + ggplot2::geom_raster(interpolate = interpolate) + + ggplot2::scale_fill_viridis_c( + option = palette, + name = if (log_scale) "log1p(C)" else "Conc.", + na.value = "transparent" + ) + + ggplot2::coord_equal() + + ggplot2::labs(title = tit, x = "X", y = "Y") + + ggplot2::theme_minimal(base_size = 11) + + ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) + if (show_contours) + g <- g + ggplot2::geom_contour( + data = df, + ggplot2::aes(z = field), + color = "white", alpha = 0.4, linewidth = 0.3, bins = n_contours + ) + if (show_sources && !is.null(transcript_coords)) + g <- g + ggplot2::geom_point( + data = transcript_coords, + ggplot2::aes(x = x, y = y), + inherit.aes = FALSE, color = pt_color, + size = pt_size, alpha = pt_alpha + ) + g +} + +#' Sweep over diffusion length values +#' +#' Convenience wrapper that calls [estimate_concentration_field()] once per +#' value in `L_values`, returning a combined tidy `data.frame` with a column +#' `L` identifying each run. Useful for visualising how the diffusion length +#' changes the field. +#' +#' @param L_values Numeric vector of diffusion lengths to sweep over. +#' @param mask An `sfc` polygon mask (see [estimate_concentration_field()]). +#' @param transcript_coords Transcript coordinates (see +#' [estimate_concentration_field()]). +#' @param verbose Logical. If `FALSE` (default), suppress per-run output. +#' @param ... Additional arguments passed to [estimate_concentration_field()] +#' (e.g. `method`, `grid_resolution`, `D`). Do not pass `diffusion_length` +#' or `verbose` here; use the dedicated arguments above. +#' +#' @return A `data.frame` combining [field_to_df()] output for every value in +#' `L_values`, with an added column `L`. +#' +#' @examples +#' library(sf) +#' set.seed(1) +#' sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +#' mask <- st_sfc(sq) +#' tc <- data.frame(x = runif(60, 1, 9), y = runif(60, 1, 9)) +#' sw <- sweep_diffusion_length(c(1, 3, 8), mask, tc, +#' grid_resolution = 64L) +#' unique(sw$L) +#' +#' @export +sweep_diffusion_length <- function(L_values, mask, transcript_coords, + verbose = FALSE, ...) { + do.call(rbind, lapply(L_values, function(Lv) { + res <- estimate_concentration_field( + mask = mask, + transcript_coords = transcript_coords, + diffusion_length = Lv, + verbose = verbose, + ... + ) + df <- field_to_df(res, inside_only = TRUE) + df$L <- Lv + df + })) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..4e7c2ad --- /dev/null +++ b/R/utils.R @@ -0,0 +1,48 @@ +# Internal helpers for TissueField +# These functions are not exported and do not generate .Rd pages. + +#' @noRd +.gauss_kernel_1d <- function(sigma, n_max = NULL) { + r <- max(1L, ceiling(3 * sigma)) + if (!is.null(n_max)) r <- min(r, floor((n_max - 1L) / 2L)) + k <- exp(-(seq(-r, r))^2 / (2 * sigma^2)) + k / sum(k) +} + +#' @noRd +#' @importFrom stats filter +.sep_gauss_smooth_2d <- function(mat, sigma_x, sigma_y) { + nr <- nrow(mat); nc <- ncol(mat) + kx <- .gauss_kernel_1d(sigma_x, n_max = nr) + ky <- .gauss_kernel_1d(sigma_y, n_max = nc) + out <- apply(mat, 2L, function(col) { + s <- stats::filter(col, kx, method = "convolution", sides = 2L) + s[is.na(s)] <- 0 + as.numeric(s) + }) + t(apply(out, 1L, function(row) { + s <- stats::filter(row, ky, method = "convolution", sides = 2L) + s[is.na(s)] <- 0 + as.numeric(s) + })) +} + +#' @noRd +#' @importFrom sf st_within +.rasterize_mask <- function(gc_sf, mask_union, N, n_cores) { + n_cells <- N * N + if (n_cores > 1L && .Platform$OS.type == "unix") { + chunk_size <- ceiling(n_cells / n_cores) + chunks <- split(seq_len(n_cells), ceiling(seq_len(n_cells) / chunk_size)) + res_list <- parallel::mclapply( + chunks, + function(idx) + as.logical(sf::st_within(gc_sf[idx, ], mask_union, sparse = FALSE)[, 1L]), + mc.cores = n_cores + ) + in_msk <- unlist(res_list) + } else { + in_msk <- as.logical(sf::st_within(gc_sf, mask_union, sparse = FALSE)[, 1L]) + } + matrix(in_msk, nrow = N, ncol = N) +} diff --git a/README.md b/README.md index 37597a7..bf9121b 100644 --- a/README.md +++ b/README.md @@ -1 +1,105 @@ -# TissueField \ No newline at end of file +# TissueField + +**TissueField** computes continuous steady-state molecular concentration fields from discrete mRNA +transcript or protein detection coordinates in spatial transcriptomics and spatial proteomics +experiments. + +## Physical model + +The package solves the **screened Poisson (diffusion-clearance) PDE**: + +``` +D * nabla^2 C(x) - lambda * C(x) + s(x) = 0 +``` + +The key parameter is the **diffusion length** `L = sqrt(D / lambda)`, which sets the characteristic +decay distance from each source. Small `L` gives tight, cluster-specific fields; large `L` fills +the entire tissue. + +## Solvers + +| Method | Algorithm | Boundary conditions | Best for | +|--------|-----------|---------------------|----------| +| `"fd"` | Finite-difference sparse linear system | Exact Dirichlet or Neumann | Default; recommended | +| `"green"` | FFT convolution with analytic Green's function | None (infinite domain) | Parameter sweeps, large grids | +| `"kde"` | Gaussian kernel density | None | Quick visualisation baseline | + +## Installation + +```r +# Install from GitHub +remotes::install_github("RaredonLab/TissueField") + +# Required: sf and Matrix +install.packages(c("sf", "Matrix")) + +# Recommended companion package for mask fitting +remotes::install_github("RaredonLab/TissueMask") +``` + +## Quick start + +```r +library(TissueField) +library(sf) + +# 1. Create (or load) an sfc polygon mask +# e.g. from TissueMask::fit_spatial_mask() +sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +mask <- st_sfc(sq) + +# 2. Provide transcript coordinates +set.seed(1) +tc <- data.frame(x = runif(200, 1, 9), y = runif(200, 1, 9)) + +# 3. Estimate the concentration field +res <- estimate_concentration_field( + mask = mask, + transcript_coords = tc, + diffusion_length = 2, # L = 2 spatial units + method = "fd", # finite-difference solver + grid_resolution = 128L, + verbose = TRUE +) + +# 4. Visualise +plot_concentration_field(res, tc, show_contours = TRUE) + +# 5. Get a tidy data frame for custom plotting +df <- field_to_df(res) +head(df) + +# 6. Sweep over multiple diffusion lengths +sw <- sweep_diffusion_length(c(0.5, 1, 3, 8), mask, tc, + grid_resolution = 64L) +``` + +## Key parameters + +| Parameter | Controls | Typical values | +|-----------|----------|----------------| +| `diffusion_length` | Spatial scale of field | 0.5 -- 20 (in coordinate units) | +| `D` | Amplitude (shape fixed by L) | 1.0 (leave at 1 unless you have physical units) | +| `lambda` | Clearance rate | Derived from L and D | +| `weight_col` | Per-transcript UMI weighting | e.g. `"umi"` | +| `boundary_condition` | Absorbing vs reflecting edge | `"dirichlet"` (default) | +| `grid_resolution` | Grid cells per axis | 128--512 | + +## Vignettes + +- **Getting started**: basic usage, `field_to_df()`, `plot_concentration_field()`, diffusion length intuition +- **Solver guide**: fd vs green vs kde, Dirichlet vs Neumann BCs, direct vs iterative +- **Parameter tuning**: L sweep, D vs lambda, UMI weighting, grid resolution, log-transform +- **Complete reference**: 12-section deep dive covering all features + +## Citation + +If you use TissueField in your research, please cite: + +> Raredon Laboratory (2026). *TissueField: Steady-State Molecular Concentration Fields for +> Spatial Transcriptomics.* R package version 0.1.0. +> https://github.com/RaredonLab/TissueField + +## License + +MIT (c) 2026 Raredon Laboratory diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..48b8c8f --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,67 @@ +url: https://raredonlab.github.io/TissueField/ + +template: + bootstrap: 5 + bootswatch: flatly + bslib: + primary: "#1a5fa8" + link-color: "#1a5fa8" + +home: + title: "TissueField — Molecular Concentration Fields for Spatial Transcriptomics" + description: > + Computes steady-state molecular concentration fields from discrete mRNA + transcript or protein detection coordinates using a physically motivated + diffusion-clearance model. Supports finite-difference, Green's function, + and kernel density estimation solvers with exact boundary conditions. + links: + - text: "RaredonLab on GitHub" + href: https://github.com/RaredonLab + - text: "TissueMask (companion package)" + href: https://github.com/RaredonLab/TissueMask + +navbar: + structure: + left: [intro, reference, articles, news] + right: [github] + components: + github: + icon: fab fa-github fa-lg + href: https://github.com/RaredonLab/TissueField + aria-label: GitHub + +reference: + - title: "Core function" + desc: > + The primary exported function for estimating concentration fields. + contents: + - estimate_concentration_field + - title: "Utilities" + desc: Helper functions for post-processing and visualisation. + contents: + - field_to_df + - plot_concentration_field + - sweep_diffusion_length + - title: "Package" + desc: Package-level documentation. + contents: + - TissueField-package + +articles: + - title: "Vignettes" + navbar: Vignettes + contents: + - getting-started + - solver-guide + - parameter-tuning + - complete-reference + +news: + releases: + - text: "TissueField 0.1.0" + href: https://github.com/RaredonLab/TissueField/releases/tag/v0.1.0 + +footer: + structure: + left: developed_by + right: built_with diff --git a/man/TissueField-package.Rd b/man/TissueField-package.Rd new file mode 100644 index 0000000..34186bc --- /dev/null +++ b/man/TissueField-package.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TissueField-package.R +\docType{package} +\name{TissueField-package} +\alias{TissueField} +\alias{TissueField-package} +\title{TissueField: Steady-State Molecular Concentration Fields for Spatial Transcriptomics} +\description{ +Computes continuous steady-state molecular concentration fields from discrete +mRNA transcript or protein detection coordinates using a physically motivated +diffusion-clearance model. The screened Poisson PDE is solved numerically +using one of three methods: finite-difference sparse linear system (\code{"fd"}), +Green's function FFT convolution (\code{"green"}), or Gaussian kernel density +(\code{"kde"}). +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/RaredonLab/TissueField} + \item Report bugs at \url{https://github.com/RaredonLab/TissueField/issues} +} + +} +\author{ +\strong{Maintainer}: Raredon Laboratory \email{raredon@yale.edu} + +} +\keyword{internal} diff --git a/man/estimate_concentration_field.Rd b/man/estimate_concentration_field.Rd new file mode 100644 index 0000000..9c9a88e --- /dev/null +++ b/man/estimate_concentration_field.Rd @@ -0,0 +1,157 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimate_concentration_field.R +\name{estimate_concentration_field} +\alias{estimate_concentration_field} +\title{Estimate a steady-state molecular concentration field} +\usage{ +estimate_concentration_field( + mask, + transcript_coords, + D = 1, + lambda = 0.1, + production_rate = 1, + diffusion_length = NULL, + weight_col = NULL, + method = "fd", + grid_resolution = 256L, + boundary_condition = "dirichlet", + fd_solver = "direct", + sor_omega = 1.7, + sor_tol = 1e-06, + sor_max_iter = 1500L, + green_r_min_factor = 0.5, + kde_bandwidth = NULL, + include_external = FALSE, + normalize = FALSE, + log_transform = FALSE, + clip_negative = TRUE, + n_cores = 1L, + return_sources = TRUE, + plot = FALSE, + verbose = TRUE +) +} +\arguments{ +\item{mask}{An \code{sfc} or \code{sf} polygon object defining the tissue boundary. +Typically the output of \code{TissueMask::fit_spatial_mask()}. Multi-polygon +objects (holes, islands) are supported.} + +\item{transcript_coords}{A \code{data.frame} or matrix with columns \code{x} and \code{y} +(or first two columns used if names differ) giving transcript or detection +coordinates in the same coordinate system as \code{mask}.} + +\item{D}{Diffusion coefficient (arbitrary spatial units squared per time). +Controls the amplitude of the field (\eqn{C \propto 1/D}); the spatial +pattern depends only on \code{diffusion_length}. Default \code{1.0}.} + +\item{lambda}{First-order clearance rate. Must be \verb{> 0} unless +\code{diffusion_length} is supplied. Default \code{0.1}.} + +\item{production_rate}{Scalar multiplier applied to all source weights. +If \code{weight_col} is \code{NULL} each transcript contributes \code{production_rate} +units of source strength. Default \code{1.0}.} + +\item{diffusion_length}{Optional. If supplied, overrides \code{D} and \code{lambda} +and sets \eqn{L} directly. Useful for sweeping the spatial scale without +changing amplitude.} + +\item{weight_col}{Optional character string naming a column in +\code{transcript_coords} to use as per-transcript weights (e.g. \code{"umi"}). +Weights are multiplied by \code{production_rate}.} + +\item{method}{Solver. One of: +\describe{ +\item{\code{"fd"}}{Finite-difference sparse linear system (default). Supports +exact Dirichlet or Neumann boundary conditions. Recommended for most +use cases.} +\item{\code{"green"}}{FFT convolution with the analytic 2-D Green's function +\eqn{K_0(\kappa r)}. Ignores domain boundary; fast for parameter +sweeps.} +\item{\code{"kde"}}{Gaussian kernel density estimate. Phenomenological +baseline; no physical boundary conditions.} +}} + +\item{grid_resolution}{Integer. Number of grid cells per axis (the grid is +\verb{grid_resolution x grid_resolution}). Default \code{256L}.} + +\item{boundary_condition}{\code{"dirichlet"} (concentration zero at boundary) or +\code{"neumann"} (zero normal flux; molecules reflected). Only applies when +\code{method = "fd"}. Default \code{"dirichlet"}.} + +\item{fd_solver}{\code{"direct"} (LU factorisation via \code{\link[Matrix:solve-methods]{Matrix::solve()}}) or +\code{"iterative"} (red-black SOR). Direct is exact and fast for +\code{grid_resolution <= 400}; iterative avoids the dense LU at very high +resolution. Only applies when \code{method = "fd"}. Default \code{"direct"}.} + +\item{sor_omega}{SOR relaxation parameter (0 < omega < 2). Default \code{1.7}.} + +\item{sor_tol}{Convergence tolerance for SOR. Default \code{1e-6}.} + +\item{sor_max_iter}{Maximum SOR iterations. Default \code{1500L}.} + +\item{green_r_min_factor}{Regularisation for the Green's function singularity +at r = 0; sets r_min = green_r_min_factor * h. Default \code{0.5}.} + +\item{kde_bandwidth}{Bandwidth for the KDE smoother. Defaults to \code{L} when +\code{method = "kde"}.} + +\item{include_external}{Logical. If \code{TRUE}, transcripts outside \code{mask} are +included as sources. Default \code{FALSE}.} + +\item{normalize}{Logical. If \code{TRUE}, rescales the field to \verb{[0, 1]} after +solving. Default \code{FALSE}.} + +\item{log_transform}{Logical. If \code{TRUE}, applies \code{log1p()} to the field +after solving (and after \code{clip_negative}). Default \code{FALSE}.} + +\item{clip_negative}{Logical. If \code{TRUE}, sets small negative values (solver +artefacts) to zero. Default \code{TRUE}.} + +\item{n_cores}{Integer. Number of cores for mask rasterisation. Parallel +rasterisation is available on Unix-like systems only. Default \code{1L}.} + +\item{return_sources}{Logical. Include the binned source density matrix in +the return value. Default \code{TRUE}.} + +\item{plot}{Logical. If \code{TRUE} and \code{ggplot2} is installed, print a quick +field plot. Default \code{FALSE}.} + +\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.} +} +\value{ +A named list with elements: +\describe{ +\item{\code{field}}{\verb{N x N} numeric matrix; \code{NA} outside the mask.} +\item{\code{x}, \code{y}}{Grid centre coordinates (length \code{N} each).} +\item{\code{hx}, \code{hy}}{Grid cell width and height.} +\item{\code{mask}}{\verb{N x N} logical matrix (\code{TRUE} = inside mask).} +\item{\code{sources}}{\verb{N x N} binned source density (or \code{NULL} when +\code{return_sources = FALSE}).} +\item{\code{params}}{List of all resolved input parameters.} +\item{\code{diagnostics}}{Timing and cell/transcript counts.} +} +} +\description{ +Solves the screened Poisson (diffusion-clearance) PDE over a spatial mask +to produce a continuous concentration field from discrete transcript or +protein detection coordinates: +} +\details{ +\deqn{D \nabla^2 C(\mathbf{x}) - \lambda C(\mathbf{x}) + s(\mathbf{x}) = 0} + +The key derived quantity is the \strong{diffusion length} \eqn{L = \sqrt{D/\lambda}}, +which sets the characteristic decay distance from each source. Three solvers +are available: \code{"fd"} (finite-difference, recommended), \code{"green"} (Green's +function via FFT), and \code{"kde"} (Gaussian kernel density, phenomenological). +} +\examples{ +library(sf) +set.seed(1) +sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +mask <- st_sfc(sq) +tc <- data.frame(x = runif(80, 1, 9), y = runif(80, 1, 9)) +res <- estimate_concentration_field(mask, tc, D = 1, lambda = 0.2, + grid_resolution = 64L, verbose = FALSE) +str(res$field) + +} diff --git a/man/field_to_df.Rd b/man/field_to_df.Rd new file mode 100644 index 0000000..16c3d09 --- /dev/null +++ b/man/field_to_df.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimate_concentration_field.R +\name{field_to_df} +\alias{field_to_df} +\title{Convert a concentration field result to a tidy data frame} +\usage{ +field_to_df(result, inside_only = TRUE) +} +\arguments{ +\item{result}{A list returned by \code{\link[=estimate_concentration_field]{estimate_concentration_field()}}.} + +\item{inside_only}{Logical. If \code{TRUE} (default), only rows inside the mask +(i.e. non-\code{NA} field values) are returned.} +} +\value{ +A \code{data.frame} with columns \code{x}, \code{y}, \code{field}, \code{inside}, and +optionally \code{source}. +} +\description{ +Expands the \verb{N x N} field matrix into a long \code{data.frame} suitable for +plotting with \strong{ggplot2} or further analysis. +} +\examples{ +library(sf) +set.seed(1) +sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +mask <- st_sfc(sq) +tc <- data.frame(x = runif(50, 1, 9), y = runif(50, 1, 9)) +res <- estimate_concentration_field(mask, tc, grid_resolution = 64L, + verbose = FALSE) +df <- field_to_df(res) +head(df) + +} diff --git a/man/plot_concentration_field.Rd b/man/plot_concentration_field.Rd new file mode 100644 index 0000000..9de88a3 --- /dev/null +++ b/man/plot_concentration_field.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimate_concentration_field.R +\name{plot_concentration_field} +\alias{plot_concentration_field} +\title{Plot a concentration field} +\usage{ +plot_concentration_field( + result, + transcript_coords = NULL, + show_sources = TRUE, + show_contours = FALSE, + n_contours = 8L, + palette = "magma", + interpolate = TRUE, + log_scale = FALSE, + pt_size = 0.4, + pt_alpha = 0.4, + pt_color = "#00e5ff", + title = NULL +) +} +\arguments{ +\item{result}{A list returned by \code{\link[=estimate_concentration_field]{estimate_concentration_field()}}.} + +\item{transcript_coords}{Optional \code{data.frame} with columns \code{x} and \code{y} to +overlay as points.} + +\item{show_sources}{Logical. If \code{TRUE} and \code{transcript_coords} is supplied, +overlay points. Default \code{TRUE}.} + +\item{show_contours}{Logical. Overlay iso-concentration contours. Default +\code{FALSE}.} + +\item{n_contours}{Integer. Number of contour levels. Default \code{8L}.} + +\item{palette}{Viridis palette name (\code{"magma"}, \code{"viridis"}, etc.). Default +\code{"magma"}.} + +\item{interpolate}{Logical. Use bilinear interpolation in \code{geom_raster}. +Default \code{TRUE}.} + +\item{log_scale}{Logical. Plot \code{log1p(field)} instead of raw field. Default +\code{FALSE}.} + +\item{pt_size}{Point size for transcript overlay. Default \code{0.4}.} + +\item{pt_alpha}{Point transparency. Default \code{0.4}.} + +\item{pt_color}{Point colour. Default \code{"#00e5ff"}.} + +\item{title}{Optional plot title; auto-generated from params if \code{NULL}.} +} +\value{ +A \code{ggplot} object. +} +\description{ +Produces a \strong{ggplot2} raster visualisation of a field returned by +\code{\link[=estimate_concentration_field]{estimate_concentration_field()}}. Optionally overlays transcript point +locations and iso-concentration contour lines. +} +\examples{ +library(sf) +set.seed(1) +sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +mask <- st_sfc(sq) +tc <- data.frame(x = runif(80, 1, 9), y = runif(80, 1, 9)) +res <- estimate_concentration_field(mask, tc, grid_resolution = 64L, + verbose = FALSE) +if (requireNamespace("ggplot2", quietly = TRUE)) + plot_concentration_field(res, tc) + +} diff --git a/man/sweep_diffusion_length.Rd b/man/sweep_diffusion_length.Rd new file mode 100644 index 0000000..6fb7674 --- /dev/null +++ b/man/sweep_diffusion_length.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimate_concentration_field.R +\name{sweep_diffusion_length} +\alias{sweep_diffusion_length} +\title{Sweep over diffusion length values} +\usage{ +sweep_diffusion_length(L_values, mask, transcript_coords, verbose = FALSE, ...) +} +\arguments{ +\item{L_values}{Numeric vector of diffusion lengths to sweep over.} + +\item{mask}{An \code{sfc} polygon mask (see \code{\link[=estimate_concentration_field]{estimate_concentration_field()}}).} + +\item{transcript_coords}{Transcript coordinates (see +\code{\link[=estimate_concentration_field]{estimate_concentration_field()}}).} + +\item{verbose}{Logical. If \code{FALSE} (default), suppress per-run output.} + +\item{...}{Additional arguments passed to \code{\link[=estimate_concentration_field]{estimate_concentration_field()}} +(e.g. \code{method}, \code{grid_resolution}, \code{D}). Do not pass \code{diffusion_length} +or \code{verbose} here; use the dedicated arguments above.} +} +\value{ +A \code{data.frame} combining \code{\link[=field_to_df]{field_to_df()}} output for every value in +\code{L_values}, with an added column \code{L}. +} +\description{ +Convenience wrapper that calls \code{\link[=estimate_concentration_field]{estimate_concentration_field()}} once per +value in \code{L_values}, returning a combined tidy \code{data.frame} with a column +\code{L} identifying each run. Useful for visualising how the diffusion length +changes the field. +} +\examples{ +library(sf) +set.seed(1) +sq <- st_polygon(list(cbind(c(0,10,10,0,0), c(0,0,10,10,0)))) +mask <- st_sfc(sq) +tc <- data.frame(x = runif(60, 1, 9), y = runif(60, 1, 9)) +sw <- sweep_diffusion_length(c(1, 3, 8), mask, tc, + grid_resolution = 64L) +unique(sw$L) + +} diff --git a/spatial-modeling-with-claude/application-to-xenium-msbr-2026-04-03.R b/spatial-modeling-with-claude/application-to-xenium-msbr-2026-04-03.R new file mode 100644 index 0000000..c6b6ed5 --- /dev/null +++ b/spatial-modeling-with-claude/application-to-xenium-msbr-2026-04-03.R @@ -0,0 +1,200 @@ +# ---- set wd ---- +setwd("~/Desktop/spatial-modeling-with-claude") + +# ---- set up ---- +require(Seurat) +require(SeuratObject) +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +source("estimate_concentration_field.R") +set.seed(2024) + +# ---- shared theme ---- +theme_demo <- function(bs=9.5) + theme_minimal(base_size=bs) + + theme(plot.title=element_text(face="bold",size=bs+0.5), + plot.subtitle=element_text(size=bs-1,color="grey40",margin=margin(b=4)), + panel.grid=element_line(color="grey94"), + legend.key.width=unit(0.35,"cm"), + legend.title=element_text(size=bs-1), + legend.text=element_text(size=bs-1.5), + axis.text=element_text(size=bs-2)) + +plot_field <- function(result, transcripts=NULL, title="", subtitle="", + palette="magma", log_scale=FALSE, + show_pts=TRUE, show_contours=TRUE, n_contours=6L, + pt_size=0.35, pt_alpha=0.45, pt_color="#00e5ff", + fill_label="Conc.", symmetric=FALSE) { + df <- field_to_df(result, inside_only=TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log\u2081\u208a(",fill_label,")") else fill_label + p <- ggplot(df, aes(x=x,y=y,fill=.data[[fill_col]])) + + geom_raster(interpolate=TRUE) + coord_equal() + + labs(title=title, subtitle=subtitle, x="X", y="Y") + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]),na.rm=TRUE) + p <- p + scale_fill_distiller(palette="RdBu",limits=c(-lim,lim), + name=fill_lbl,na.value="transparent") + } else { + p <- p + scale_fill_viridis_c(option=palette,name=fill_lbl,na.value="transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z=field),color="white", + alpha=0.35,linewidth=0.25,bins=n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data=transcripts,aes(x=x,y=y),inherit.aes=FALSE, + color=pt_color,size=pt_size,alpha=pt_alpha) + p +} + +sample_in_mask <- function(mask_poly, n, max_tries=20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x=numeric(0),y=numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts)>=n) break + cnd <- data.frame(x=runif(n*8,bb["xmin"],bb["xmax"]), + y=runif(n*8,bb["ymin"],bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd,coords=c("x","y")),mu,sparse=FALSE)[,1L]) + pts <- rbind(pts,cnd[ok,]) + } + pts[seq_len(min(n,nrow(pts))),] +} + +# ---- Shared helpers ---------------------------------------------------------- + +plot_mask <- function(mask, coords, title = "", subtitle = "", + fill = "#4c9be8", border = "#1a5fa8", + pt_col = "#c0392b", pt_size = 0.9, pt_alpha = 0.5) { + # geom_sf is required for correct hole rendering. + # geom_polygon draws a line between consecutive rings in the same group, + # producing diagonal artefacts across hole interiors. + mask_sf <- sf::st_sf(geometry = mask) + ggplot() + + geom_sf(data = mask_sf, + fill = fill, alpha = 0.2, color = border, linewidth = 0.55) + + geom_point(data = coords, aes(x = x, y = y), + color = pt_col, size = pt_size, alpha = pt_alpha) + + coord_sf(datum = NA) + # datum=NA: suppress geographic graticule + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_minimal(base_size = 9.5) + + theme(plot.title = element_text(face = "bold", size = 9.5), + plot.subtitle = element_text(size = 8, color = "grey40"), + panel.grid = element_line(color = "grey93")) +} + +# ---- apply to real world data ---- +# load xenium object +load("~/Large Files/Pneumonectomy_Project_transfer/MSBR_polishing_2025-11-01/outputs/PPLR.polished.with.territory.and.neighborhood.2025-11-17.Robj") + +# subset out just one TMA puck +PPLR.polished <- UpdateSeuratObject(PPLR.polished) +Idents(PPLR.polished) <- PPLR.polished$TMA +table(Idents(PPLR.polished)) +temp <- subset(PPLR.polished, + idents = 'PNX7_M1_Sp') +temp + +# get centroid xy coordinates +tissue_pts <- data.frame(x = temp$x, + y = temp$y) +# make tissue mask +tissue_mask <- fit_spatial_mask(tissue_pts, + method="raster", + raster_resolution=256L, + raster_sigma=1, + raster_threshold=0.75, + verbose=TRUE, + buffer_dist = 3, + smooth_mask = T, + smooth_tolerance = 3) +# plot mask +plot_mask(tissue_mask, + tissue_pts, + title = "PNX7_M1_Sp", + subtitle = "raster_resolution = 300, raster_sigma = 1, raster_threshold = 0.75, smooth = T, smooth_tolerance = 3, buffer = 3") + +# get transcript xy coordinates +molecules <- temp[['PNX7_M1_Sp']][['molecules']] +tc_Areg <- molecules$Areg@coords +tc_Vegfa <- molecules$Vegfa@coords + +# inspect +tissue_sf <- sf::st_sf(geometry = tissue_mask) +print(ggplot() + + geom_sf(data=tissue_sf, fill="grey92", color="grey60", linewidth=0.4) + + geom_point(data=tc_Areg,aes(x=x,y=y,color="Areg"),alpha=0.6,size=0.1) + + geom_point(data=tc_Vegfa,aes(x=x,y=y,color="Vegfa"),alpha=0.5,size=0.1) + + scale_color_manual(values=c("Areg"="#e74c3c","Vegfa"="#2980b9"),name=NULL) + + #scale_size_continuous(range=c(0.3,2.5),name="log(UMI+1)") + + coord_sf(datum=NA) + theme_demo() + + labs(title="PNX7_M1_Sp", + subtitle="Areg | Vegfa",x="X",y="Y")) + +print(ggplot() + + geom_sf(data=tissue_sf, fill="grey92", color="grey60", linewidth=0.4) + + geom_point(data=tc_Areg,aes(x=x,y=y,color="Areg"),alpha=0.6,size=0.1) + + geom_point(data=tc_Vegfa,aes(x=x,y=y,color="Vegfa"),alpha=0.5,size=0.1) + + scale_color_manual(values=c("Areg"="#e74c3c","Vegfa"="#2980b9"),name=NULL) + + #scale_size_continuous(range=c(0.3,2.5),name="log(UMI+1)") + + coord_sf(datum=NA) + theme_demo() + + labs(title="PNX7_M1_Sp", + subtitle="Areg | Vegfa",x="X",y="Y"))+xlim(6000,6500)+ylim(11500,12000) + +# ---- Section 2: Method comparison ---- + +cat("Section 2: Method comparison...\n") +pb <- list(D=1, + diffusion_length = 10, + lambda=0.3, + production_rate=1, + grid_resolution=256L, + verbose=FALSE) + +res_fd <- do.call(estimate_concentration_field, + c(list(mask=tissue_mask, + transcript_coords=tc_Vegfa, + method="fd", + fd_solver="direct", + boundary_condition="dirichlet"),pb)) +res_green <- do.call(estimate_concentration_field, + c(list(mask=tissue_mask, + transcript_coords=tc_Vegfa, + method="green"),pb)) +res_kde <- do.call(estimate_concentration_field, + c(list(mask=tissue_mask, + transcript_coords=tc_Vegfa, + method="kde", + kde_bandwidth=sqrt(1/0.3)),pb)) +res_diff <- res_fd; +res_diff$field <- res_fd$field - res_green$field + +print((plot_field(res_fd,tc_Vegfa,title="2a. fd (Dirichlet)", + subtitle="Exact BCs; recommended", + show_pts = F)+ + xlim(6000,6500)+ + ylim(11500,12000) + + plot_field(res_green,tc_Vegfa,title="2b. green (FFT)", + subtitle="Infinite domain; no BCs", + show_pts = F)+ + xlim(6000,6500)+ + ylim(11500,12000)) / + (plot_field(res_kde,tc_Vegfa,title="2c. kde", + subtitle="Bandwidth=L; phenomenological", + show_pts = F)+ + xlim(6000,6500)+ + ylim(11500,12000) + + plot_field(res_diff,title="2d. fd \u2212 green", + subtitle="BCs suppress near-edge conc.", + symmetric=TRUE, + show_contours=FALSE, + show_pts=FALSE) + + xlim(6000,6500)+ + ylim(11500,12000) + + plot_annotation(title="Section 2: Method Comparison (Gene A, D=1, \u03bb=0.3)", + theme=theme(plot.title=element_text(face="bold",size=13))))) + + + diff --git a/spatial-modeling-with-claude/demo_concentration_field.R b/spatial-modeling-with-claude/demo_concentration_field.R new file mode 100644 index 0000000..f332d92 --- /dev/null +++ b/spatial-modeling-with-claude/demo_concentration_field.R @@ -0,0 +1,486 @@ +# ============================================================ +# demo_concentration_field.R +# +# Comprehensive demonstration of estimate_concentration_field(). +# +# Run after sourcing: +# source("fit_spatial_mask.R") +# source("estimate_concentration_field.R") +# +# Required packages: +# install.packages(c("sf","concaveman","MASS","isoband", +# "Matrix","ggplot2","patchwork")) +# +# Sections: +# 1 — Synthetic tissue + raw transcript layout +# 2 — Method comparison: fd / green / kde +# 3 — Diffusion length L sweep (most important parameter) +# 4 — Fixed L, varying D and lambda +# 5 — Dirichlet vs. Neumann boundary conditions +# 6 — UMI weighting vs. equal weighting +# 7 — External transcripts (include_external) +# 8 — Holey mask: vascular clearance topology +# 9 — Wide dynamic range + log1p visualisation +# 10 — Two-gene log2(A/B) ratio map +# 11 — 100k transcripts + solver timing comparison +# 12 — Quantitative field extraction +# ============================================================ + +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +source("estimate_concentration_field.R") +set.seed(2024) + +# ---- shared theme ---- +theme_demo <- function(bs=9.5) + theme_minimal(base_size=bs) + + theme(plot.title=element_text(face="bold",size=bs+0.5), + plot.subtitle=element_text(size=bs-1,color="grey40",margin=margin(b=4)), + panel.grid=element_line(color="grey94"), + legend.key.width=unit(0.35,"cm"), + legend.title=element_text(size=bs-1), + legend.text=element_text(size=bs-1.5), + axis.text=element_text(size=bs-2)) + +plot_field <- function(result, transcripts=NULL, title="", subtitle="", + palette="magma", log_scale=FALSE, + show_pts=TRUE, show_contours=TRUE, n_contours=6L, + pt_size=0.35, pt_alpha=0.45, pt_color="#00e5ff", + fill_label="Conc.", symmetric=FALSE) { + df <- field_to_df(result, inside_only=TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log\u2081\u208a(",fill_label,")") else fill_label + p <- ggplot(df, aes(x=x,y=y,fill=.data[[fill_col]])) + + geom_raster(interpolate=TRUE) + coord_equal() + + labs(title=title, subtitle=subtitle, x="X", y="Y") + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]),na.rm=TRUE) + p <- p + scale_fill_distiller(palette="RdBu",limits=c(-lim,lim), + name=fill_lbl,na.value="transparent") + } else { + p <- p + scale_fill_viridis_c(option=palette,name=fill_lbl,na.value="transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z=field),color="white", + alpha=0.35,linewidth=0.25,bins=n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data=transcripts,aes(x=x,y=y),inherit.aes=FALSE, + color=pt_color,size=pt_size,alpha=pt_alpha) + p +} + +sample_in_mask <- function(mask_poly, n, max_tries=20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x=numeric(0),y=numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts)>=n) break + cnd <- data.frame(x=runif(n*8,bb["xmin"],bb["xmax"]), + y=runif(n*8,bb["ymin"],bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd,coords=c("x","y")),mu,sparse=FALSE)[,1L]) + pts <- rbind(pts,cnd[ok,]) + } + pts[seq_len(min(n,nrow(pts))),] +} + + +# ============================================================ +# SECTION 1 — Synthetic tissue +# ============================================================ +cat("Section 1: Building tissue...\n") + +in_kidney <- function(x,y,oa=9,ob=7,ncx=5,nr=3.8) + (x/oa)^2+(y/ob)^2<1 & sqrt((x-ncx)^2+y^2)>=nr + +cands <- data.frame(x=runif(18000,-11,11),y=runif(18000,-9,9)) +tissue_pts <- cands[in_kidney(cands$x,cands$y),][1:3000,] +tissue_mask <- fit_spatial_mask(tissue_pts, method="raster", + raster_resolution=256L, raster_sigma=0.7, raster_threshold=0.15, verbose=FALSE) + +mu_t <- sf::st_union(tissue_mask) + +tc_A_cl <- data.frame(x=rnorm(350,-4.5,1.2), y=rnorm(350,3.8,0.9)) +A_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_A_cl,coords=c("x","y")),mu_t,sparse=FALSE)[,1L]) +tc_A_cl <- tc_A_cl[A_in,] +tc_A_sc <- sample_in_mask(tissue_mask,50) +tc_A <- rbind(tc_A_cl,tc_A_sc) +tc_A$umi <- c(rpois(nrow(tc_A_cl),15), rpois(nrow(tc_A_sc),2)) + +tc_B_all <- sample_in_mask(tissue_mask,2000) +prob_B <- plogis(tc_B_all$x/3) +tc_B <- tc_B_all[runif(nrow(tc_B_all))=length(xg)||yi<1||yi>=length(yg)) return(NA_real_) + wx<-(qx-xg[xi])/(xg[xi+1]-xg[xi]); wy<-(qy-yg[yi])/(yg[yi+1]-yg[yi]) + (1-wx)*(1-wy)*f[xi,yi]+wx*(1-wy)*f[xi+1,yi]+(1-wx)*wy*f[xi,yi+1]+wx*wy*f[xi+1,yi+1] + }, numeric(1)) +} + +queries <- data.frame( + label=c("Cluster core","Cluster edge","Tissue centre","Far right","Near notch"), + x=c(-4.5,-3.0,0.0,5.0,3.0), y=c(3.8,2.5,0.0,0.0,0.0)) +queries$concentration <- interpolate_field(res_q, queries) + +cat("\n 12a. Concentration at query points:\n") +print(queries[,c("label","x","y","concentration")], row.names=FALSE) + +df_q <- field_to_df(res_q) +cat(sprintf("\n 12b. Mean conc. by half:\n Left : %.5f\n Right: %.5f\n", + mean(df_q$field[df_q$x< 0 & !is.na(df_q$field)]), + mean(df_q$field[df_q$x>=0 & !is.na(df_q$field)]))) + +pk <- which.max(res_q$field); N <- length(res_q$x) +cat(sprintf("\n 12c. Peak: x=%.2f, y=%.2f, value=%.6f\n", + res_q$x[((pk-1L)%%N)+1L], res_q$y[((pk-1L)%/%N)+1L], res_q$field[pk])) + +total <- sum(res_q$field[res_q$mask],na.rm=TRUE)*res_q$hx*res_q$hy +theory <- nrow(tc_A)*1.0/0.3 +cat(sprintf("\n 12d. Field integral = %.4f\n", total)) +cat(sprintf(" Theory (n*r/lam) = %.4f\n", theory)) +cat(sprintf(" Agreement = %.1f%%\n", 100*(1-abs(total-theory)/theory))) + +prof <- df_q[abs(df_q$y) 400.\n") +cat(strrep("=",60),"\n") + + + + + + + diff --git a/spatial-modeling-with-claude/estimate_concentration_field.R b/spatial-modeling-with-claude/estimate_concentration_field.R new file mode 100644 index 0000000..ef29a9a --- /dev/null +++ b/spatial-modeling-with-claude/estimate_concentration_field.R @@ -0,0 +1,323 @@ +# ============================================================ +# estimate_concentration_field.R +# +# Models steady-state continuous molecular concentration over a +# spatial mask from individual mRNA transcript point sources. +# +# Physical model: D * nabla^2 C(x) - lambda * C(x) + s(x) = 0 +# +# Three solvers: +# "fd" — finite-difference sparse linear system (recommended) +# "green" — FFT convolution with 2D Green's function K0(kappa*r) +# "kde" — Gaussian kernel smoothing (phenomenological baseline) +# +# install.packages(c("sf","Matrix","ggplot2")) +# ============================================================ + +.gauss_kernel_1d <- function(sigma) { + r <- max(1L,ceiling(3*sigma)); k <- exp(-(seq(-r,r))^2/(2*sigma^2)); k/sum(k) +} + +.sep_gauss_smooth_2d <- function(mat, sigma_x, sigma_y) { + kx <- .gauss_kernel_1d(sigma_x); ky <- .gauss_kernel_1d(sigma_y) + out <- apply(mat,2L,function(col){s<-stats::filter(col,kx,method="convolution",sides=2L);s[is.na(s)]<-0;as.numeric(s)}) + t(apply(out,1L,function(row){s<-stats::filter(row,ky,method="convolution",sides=2L);s[is.na(s)]<-0;as.numeric(s)})) +} + +.rasterize_mask <- function(gc_sf, mask_union, N, n_cores) { + n_cells <- N*N + if (n_cores>1L && .Platform$OS.type=="unix") { + chunk_size <- ceiling(n_cells/n_cores) + chunks <- split(seq_len(n_cells),ceiling(seq_len(n_cells)/chunk_size)) + res_list <- parallel::mclapply(chunks,function(idx) + as.logical(sf::st_within(gc_sf[idx,],mask_union,sparse=FALSE)[,1L]),mc.cores=n_cores) + in_msk <- unlist(res_list) + } else { + in_msk <- as.logical(sf::st_within(gc_sf,mask_union,sparse=FALSE)[,1L]) + } + matrix(in_msk,nrow=N,ncol=N) +} + +estimate_concentration_field <- function( + mask, + transcript_coords, + D = 1.0, + lambda = 0.1, + production_rate = 1.0, + diffusion_length = NULL, + weight_col = NULL, + method = "fd", + grid_resolution = 256L, + boundary_condition = "dirichlet", + fd_solver = "direct", + sor_omega = 1.7, + sor_tol = 1e-6, + sor_max_iter = 1500L, + green_r_min_factor = 0.5, + kde_bandwidth = NULL, + include_external = FALSE, + normalize = FALSE, + log_transform = FALSE, + clip_negative = TRUE, + n_cores = 1L, + return_sources = TRUE, + plot = FALSE, + verbose = TRUE +) { + t0 <- proc.time()["elapsed"] + + # 0. Packages + req <- "sf"; if (method=="fd"&&fd_solver=="direct") req <- c(req,"Matrix") + miss <- req[!sapply(req,requireNamespace,quietly=TRUE)] + if (length(miss)>0L) stop("Install: install.packages(c(",paste0('"',miss,'"',collapse=","),"))") + + # 1. Physics + if (!is.null(diffusion_length)) { + L <- as.numeric(diffusion_length); kappa2 <- 1/L^2; kappa <- 1/L + } else { + if (lambda<=0 && method!="kde") + stop("`lambda` must be > 0 for a finite steady-state field.") + if (lambda>0) { L <- sqrt(D/lambda); kappa2 <- lambda/D; kappa <- sqrt(kappa2) + } else { L <- Inf; kappa2 <- 0; kappa <- 0 } + } + N <- as.integer(grid_resolution); nx_c <- as.integer(n_cores) + + if (verbose) { + cat("============================================================\n") + cat(" estimate_concentration_field\n") + cat("============================================================\n") + cat(sprintf(" Method: %s | D: %g | lambda: %g | L: %g | N: %d x %d\n", + method,D,lambda,L,N,N)) + if (method=="fd") cat(sprintf(" BC: %s | solver: %s\n",boundary_condition,fd_solver)) + cat("------------------------------------------------------------\n") + } + + # 2. Parse transcripts + if (is.matrix(transcript_coords)) transcript_coords <- as.data.frame(transcript_coords) + if (!all(c("x","y") %in% names(transcript_coords))) names(transcript_coords)[1:2] <- c("x","y") + tc <- transcript_coords[complete.cases(transcript_coords[,c("x","y")]),] + weights <- if (!is.null(weight_col)) tc[[weight_col]]*production_rate else + rep(production_rate,nrow(tc)) + + # 3. Filter by mask + mu_sf <- sf::st_union(mask) + tc_sf <- sf::st_as_sf(tc[,c("x","y")],coords=c("x","y"),crs=sf::st_crs(mask)) + in_msk <- as.logical(sf::st_within(tc_sf,mu_sf,sparse=FALSE)[,1L]) + n_tot <- nrow(tc); n_ext <- sum(!in_msk) + if (!include_external) { tc <- tc[in_msk,]; weights <- weights[in_msk] } + n_src <- nrow(tc) + if (n_src==0L) stop("No transcripts available. Check coords or set include_external=TRUE.") + if (verbose) cat(sprintf(" Transcripts: %d used (%d external %s)\n", + n_src, n_ext, if(include_external)"included" else "excluded")) + + # 4. Grid + bb <- sf::st_bbox(mu_sf); pad <- 0.03 + xpad <- (bb["xmax"]-bb["xmin"])*pad; ypad <- (bb["ymax"]-bb["ymin"])*pad + x_breaks <- seq(bb["xmin"]-xpad,bb["xmax"]+xpad,length.out=N+1L) + y_breaks <- seq(bb["ymin"]-ypad,bb["ymax"]+ypad,length.out=N+1L) + x_centers <- (x_breaks[-1L]+x_breaks[-(N+1L)])*0.5 + y_centers <- (y_breaks[-1L]+y_breaks[-(N+1L)])*0.5 + hx <- x_centers[2L]-x_centers[1L]; hy <- y_centers[2L]-y_centers[1L]; h <- (hx+hy)*0.5 + + # 5. Rasterize mask + if (verbose) cat(sprintf(" Rasterizing %dx%d grid...\n",N,N)) + gdf <- expand.grid(x=x_centers,y=y_centers) + gc_sf <- sf::st_as_sf(gdf,coords=c("x","y"),crs=sf::st_crs(mask)) + mask_mat <- .rasterize_mask(gc_sf,mu_sf,N,nx_c) + n_inside <- sum(mask_mat) + if (verbose) cat(sprintf(" Inside-mask cells: %d (%.1f%%)\n",n_inside,100*n_inside/N^2)) + + # 6. Bin sources + xi_s <- pmax(1L,pmin(N,findInterval(tc$x,x_breaks,rightmost.closed=TRUE))) + yi_s <- pmax(1L,pmin(N,findInterval(tc$y,y_breaks,rightmost.closed=TRUE))) + lin_s <- xi_s+(yi_s-1L)*N + src_vec <- numeric(N*N) + wt_agg <- rowsum(matrix(weights,ncol=1L),lin_s) + src_vec[as.integer(rownames(wt_agg))] <- wt_agg[,1L] + source_mat <- matrix(src_vec,nrow=N,ncol=N) + + # 7. Solve + t_solve <- proc.time()["elapsed"] + + field_mat <- switch(method, + + "fd" = { + inside_lin <- which(mask_mat); K <- length(inside_lin) + cell2sys <- integer(N*N); cell2sys[inside_lin] <- seq_len(K) + xi_in <- ((inside_lin-1L)%%N)+1L; yi_in <- ((inside_lin-1L)%/%N)+1L + nbr_xm <- ifelse(xi_in>1L,inside_lin-1L,0L) + nbr_xp <- ifelse(xi_in1L,inside_lin-N, 0L) + nbr_yp <- ifelse(yi_in0L&nb<=N*N;res<-logical(length(nb));res[ok]<-mask_mat[nb[ok]];res} + nbr_list <- list(nbr_xm,nbr_xp,nbr_ym,nbr_yp); wt_list <- c(nx_w,nx_w,ny_w,ny_w) + + if (fd_solver=="direct") { + diag_vals <- rep(-(2*nx_w+2*ny_w+kappa2),K) + off_rows <- integer(0); off_cols <- integer(0); off_vals <- numeric(0) + for (d in seq_along(nbr_list)) { + nb <- nbr_list[[d]]; wt <- wt_list[[d]] + is_in <- is_in_mask(nb); is_out <- (nb>0L)&!is_in; is_oob <- (nb==0L) + sel <- which(is_in) + if (length(sel)>0L){off_rows<-c(off_rows,sel);off_cols<-c(off_cols,cell2sys[nb[sel]]);off_vals<-c(off_vals,rep(wt,length(sel)))} + if (boundary_condition=="neumann") { + nc2 <- which(is_out|is_oob); diag_vals[nc2] <- diag_vals[nc2]+wt + } + } + A <- Matrix::sparseMatrix(i=c(seq_len(K),off_rows),j=c(seq_len(K),off_cols), + x=c(diag_vals,off_vals),dims=c(K,K)) + b_vec <- -source_mat[inside_lin]/(D*hx*hy) + if (verbose) cat(sprintf(" Sparse system: %dx%d, %d nnz\n",K,K,length(A@x))) + c_vec <- as.numeric(Matrix::solve(A,b_vec)) + out <- matrix(NA_real_,N,N); out[inside_lin] <- c_vec; out + + } else { + if (boundary_condition=="neumann") message("SOR uses Dirichlet BC only.") + xi_grid <- matrix(rep(seq_len(N),N),N,N) + yi_grid <- matrix(rep(seq_len(N),each=N),N,N) + is_red <- ((xi_grid+yi_grid)%%2L)==0L + ins_red <- mask_mat& is_red; ins_blk <- mask_mat&!is_red + C <- matrix(0,N,N); denom <- 2*nx_w+2*ny_w+kappa2 + src_dens <- source_mat/(D*hx*hy); converged <- FALSE + for (iter in seq_len(sor_max_iter)) { + C_prev <- C + Cxm <- rbind(matrix(0,1,N),C[1:(N-1),]); Cxp <- rbind(C[2:N,],matrix(0,1,N)) + Cym <- cbind(matrix(0,N,1),C[,1:(N-1)]); Cyp <- cbind(C[,2:N],matrix(0,N,1)) + Cgs <- (nx_w*(Cxm+Cxp)+ny_w*(Cym+Cyp)+src_dens)/denom + C[ins_red] <- (1-sor_omega)*C[ins_red]+sor_omega*Cgs[ins_red]; C[!mask_mat]<-0 + Cxm <- rbind(matrix(0,1,N),C[1:(N-1),]); Cxp <- rbind(C[2:N,],matrix(0,1,N)) + Cym <- cbind(matrix(0,N,1),C[,1:(N-1)]); Cyp <- cbind(C[,2:N],matrix(0,N,1)) + Cgs <- (nx_w*(Cxm+Cxp)+ny_w*(Cym+Cyp)+src_dens)/denom + C[ins_blk] <- (1-sor_omega)*C[ins_blk]+sor_omega*Cgs[ins_blk]; C[!mask_mat]<-0 + delta <- max(abs(C[mask_mat]-C_prev[mask_mat])) + if (is.finite(delta)&&delta0.") + Np <- 2L*N + xi_k <- c(0L:(N-1L),(-N):(-1L)); yi_k <- c(0L:(N-1L),(-N):(-1L)) + dx_mat <- outer(xi_k,rep(1L,Np))*hx; dy_mat <- outer(rep(1L,Np),yi_k)*hy + r_mat <- sqrt(dx_mat^2+dy_mat^2); r_min <- green_r_min_factor*h + r_mat[r_matfmn) field_mat <- (field_mat-fmn)/(fmx-fmn) + } + + t_total <- proc.time()["elapsed"]-t0 + if (verbose) { + cat(sprintf(" Solve: %.2fs | Total: %.2fs | Range: [%.4g, %.4g]\n", + t_solve_end-t_solve, t_total, + min(field_mat,na.rm=TRUE),max(field_mat,na.rm=TRUE))) + cat("============================================================\n") + } + + # 9. Plot + if (plot && requireNamespace("ggplot2",quietly=TRUE)) { + gdf <- expand.grid(x=x_centers,y=y_centers); gdf$field <- as.vector(field_mat) + gdf <- gdf[!is.na(gdf$field),] + print(ggplot2::ggplot(gdf,ggplot2::aes(x=x,y=y,fill=field))+ + ggplot2::geom_raster(interpolate=TRUE)+ + ggplot2::scale_fill_viridis_c(option="magma",name="Conc.",na.value="transparent")+ + ggplot2::coord_equal()+ggplot2::theme_minimal(base_size=11)+ + ggplot2::labs(title=paste0("Concentration | method='",method,"'"),x="X",y="Y")) + } + + # 10. Return + list( + field = field_mat, + x = x_centers, y = y_centers, hx = hx, hy = hy, + mask = mask_mat, + sources = if (return_sources) source_mat else NULL, + params = list(D=D,lambda=lambda,kappa2=kappa2,diffusion_length=L, + production_rate=production_rate,method=method, + grid_resolution=N,boundary_condition=if(method=="fd")boundary_condition else NA, + fd_solver=if(method=="fd")fd_solver else NA, + include_external=include_external,log_transform=log_transform,normalize=normalize), + diagnostics = list(n_transcripts_total=n_tot,n_transcripts_used=n_src, + n_transcripts_external=n_ext,n_cells_inside_mask=n_inside, + elapsed_total_s=t_total,elapsed_solve_s=t_solve_end-t_solve) + ) +} + +# ============================================================ +# UTILITY FUNCTIONS +# ============================================================ + +field_to_df <- function(result, inside_only=TRUE) { + df <- expand.grid(x=result$x,y=result$y) + df$field <- as.vector(result$field) + df$inside <- as.vector(result$mask) + if (!is.null(result$sources)) df$source <- as.vector(result$sources) + if (inside_only) df <- df[df$inside,] + df +} + +plot_concentration_field <- function( + result, transcript_coords=NULL, + show_sources=TRUE, show_contours=FALSE, n_contours=8L, + palette="magma", interpolate=TRUE, log_scale=FALSE, + pt_size=0.4, pt_alpha=0.4, pt_color="#00e5ff", title=NULL) { + if (!requireNamespace("ggplot2",quietly=TRUE)) + stop("Requires ggplot2.") + df <- field_to_df(result,inside_only=TRUE) + p <- result$params; L_str <- if(is.finite(p$diffusion_length)) round(p$diffusion_length,3) else "Inf" + tit <- if(is.null(title)) sprintf("Concentration | %s | D=%g, lambda=%g, L=%s", + p$method,p$D,p$lambda,L_str) else title + fc <- if (log_scale){df$lf<-log1p(df$field);"lf"} else "field" + g <- ggplot2::ggplot(df,ggplot2::aes(x=x,y=y,fill=.data[[fc]]))+ + ggplot2::geom_raster(interpolate=interpolate)+ + ggplot2::scale_fill_viridis_c(option=palette, + name=if(log_scale)"log1p(C)" else "Conc.", + na.value="transparent")+ + ggplot2::coord_equal()+ + ggplot2::labs(title=tit,x="X",y="Y")+ + ggplot2::theme_minimal(base_size=11)+ + ggplot2::theme(plot.title=ggplot2::element_text(face="bold")) + if (show_contours) + g <- g+ggplot2::geom_contour(data=df,ggplot2::aes(z=field), + color="white",alpha=0.4,linewidth=0.3,bins=n_contours) + if (show_sources && !is.null(transcript_coords)) + g <- g+ggplot2::geom_point(data=transcript_coords,ggplot2::aes(x=x,y=y), + inherit.aes=FALSE,color=pt_color,size=pt_size,alpha=pt_alpha) + g +} + +sweep_diffusion_length <- function(L_values, mask, transcript_coords, ...) { + do.call(rbind, lapply(L_values, function(Lv) { + cat(sprintf("\n--- Sweeping L = %g ---\n",Lv)) + res <- estimate_concentration_field(mask=mask,transcript_coords=transcript_coords, + diffusion_length=Lv,verbose=FALSE,...) + df <- field_to_df(res,inside_only=TRUE); df$L <- Lv; df + })) +} diff --git a/spatial-modeling-with-claude/vignette_concentration_field.Rmd b/spatial-modeling-with-claude/vignette_concentration_field.Rmd new file mode 100644 index 0000000..009a8f8 --- /dev/null +++ b/spatial-modeling-with-claude/vignette_concentration_field.Rmd @@ -0,0 +1,908 @@ +--- +title: "Estimating Steady-State Molecular Concentration Fields" +subtitle: "A guide to `estimate_concentration_field()`: physics, solvers, and tuning" +author: "Raredon Laboratory · Yale School of Medicine" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: + collapsed: false + smooth_scroll: true + toc_depth: 3 + theme: flatly + highlight: kate + code_folding: show + fig_width: 10 + fig_height: 5 + df_print: kable +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + cache = FALSE +) +``` + +--- + +# Overview + +Spatial transcriptomics data gives us discrete transcript or cell locations, but many biological processes — ligand gradients, cytokine fields, metabolite concentrations — are continuous. `estimate_concentration_field()` bridges this gap by computing the **steady-state concentration field** that a set of point sources (mRNA transcripts, detected proteins) would produce under a physically motivated diffusion-clearance model. + +The result is a dense concentration map over the tissue, interpolated from point source locations. This can be used to: + +- Identify where a signalling molecule is most concentrated +- Compute the concentration seen by each cell +- Create two-gene ratio maps (spatial domains of ligand/receptor balance) +- Visualise how diffusion length affects signal range + +--- + +# The physical model + +The function solves the **screened Poisson equation** (also called the steady-state diffusion-clearance PDE): + +$$D \nabla^2 C(\mathbf{x}) - \lambda C(\mathbf{x}) + s(\mathbf{x}) = 0$$ + +where: + +| Symbol | Meaning | Parameter | +|---|---|---| +| $C(\mathbf{x})$ | Steady-state concentration at position **x** | — (what we solve for) | +| $D$ | Diffusion coefficient (spatial spread rate) | `D` | +| $\lambda$ | First-order clearance rate | `lambda` | +| $s(\mathbf{x})$ | Source density (production rate per unit area) | `production_rate` | + +The key derived quantity is the **diffusion length**: + +$$L = \sqrt{D / \lambda}$$ + +$L$ sets the characteristic distance over which concentration decays from a point source. It is the single most important parameter to tune. The shape of the field depends only on $L$; the amplitude scales as $1/D$. + +> **Practical interpretation:** $L$ is roughly the radius of influence of a single transcript. At distance $r \gg L$ from a source, the concentration is negligible. At $r \ll L$, concentration is near its maximum. + +--- + +# Setup + +```{r libraries} +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +source("estimate_concentration_field.R") + +set.seed(2024) +``` + +--- + +# Shared helpers + +```{r helpers} +# ---- Plot theme ---- +theme_demo <- function(bs = 9.5) + theme_minimal(base_size = bs) + + theme(plot.title = element_text(face = "bold", size = bs + 0.5), + plot.subtitle = element_text(size = bs - 1, color = "grey40", + margin = margin(b = 4)), + panel.grid = element_line(color = "grey94"), + legend.key.width = unit(0.35, "cm"), + legend.title = element_text(size = bs - 1), + legend.text = element_text(size = bs - 1.5), + axis.text = element_text(size = bs - 2)) + +# ---- Field visualiser ---- +# Uses geom_raster (correct for continuous fields on regular grids). +# show_contours overlays iso-concentration lines. +plot_field <- function(result, transcripts = NULL, + title = "", subtitle = "", + palette = "magma", log_scale = FALSE, + show_pts = TRUE, show_contours = TRUE, n_contours = 6L, + pt_size = 0.35, pt_alpha = 0.45, pt_color = "#00e5ff", + fill_label = "Conc.", symmetric = FALSE) { + df <- field_to_df(result, inside_only = TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log\u2081\u208a(", fill_label, ")") else fill_label + p <- ggplot(df, aes(x = x, y = y, fill = .data[[fill_col]])) + + geom_raster(interpolate = TRUE) + coord_equal() + + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]), na.rm = TRUE) + p <- p + scale_fill_distiller(palette = "RdBu", limits = c(-lim, lim), + name = fill_lbl, na.value = "transparent") + } else { + p <- p + scale_fill_viridis_c(option = palette, name = fill_lbl, + na.value = "transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z = field), color = "white", + alpha = 0.35, linewidth = 0.25, bins = n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data = transcripts, aes(x = x, y = y), + inherit.aes = FALSE, color = pt_color, + size = pt_size, alpha = pt_alpha) + p +} + +# ---- Sample points uniformly from inside a mask ---- +sample_in_mask <- function(mask_poly, n, max_tries = 20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x = numeric(0), y = numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts) >= n) break + cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]), + y = runif(n * 8, bb["ymin"], bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L]) + pts <- rbind(pts, cnd[ok, ]) + } + pts[seq_len(min(n, nrow(pts))), ] +} +``` + +--- + +# Function reference: `estimate_concentration_field()` + +```r +estimate_concentration_field( + mask, # sfc polygon — output of fit_spatial_mask() + transcript_coords, # data.frame with columns x and y (plus optional weight_col) + + # Physics + D = 1.0, # diffusion coefficient (arbitrary units) + lambda = 0.1, # first-order clearance rate + diffusion_length = NULL, # override: set L directly; NULL uses sqrt(D/lambda) + production_rate = 1.0, # multiplied by weights if weight_col is set + + # Solver + method = "fd", # "fd" | "green" | "kde" + grid_resolution = 256L, # grid cells per axis (N × N) + boundary_condition = "dirichlet", # "dirichlet" | "neumann" (fd only) + fd_solver = "direct", # "direct" | "iterative" (fd only) + + # Weighting + weight_col = NULL, # column name for per-transcript weights (e.g. "umi") + + # Optional + include_external = FALSE, # include transcripts outside mask? + normalize = FALSE, # rescale field to [0, 1]? + log_transform = FALSE, # apply log1p() after solving? + clip_negative = TRUE, # set small negatives to 0? + n_cores = 1L, + plot = FALSE, + verbose = TRUE +) +``` + +## Return value + +A named list containing: + +| Element | Contents | +|---|---| +| `field` | N×N matrix; `NA` outside the mask | +| `x`, `y` | Grid centre coordinates (length N each) | +| `hx`, `hy` | Grid cell width and height | +| `mask` | N×N logical matrix (TRUE = inside mask) | +| `sources` | N×N matrix of binned source density | +| `params` | List of all input parameters | +| `diagnostics` | Timing, counts, etc. | + +Use `field_to_df(result)` to convert to a tidy data frame for ggplot. + +--- + +# Section 1 — Synthetic tissue and transcript layout + +We build a kidney-bean-shaped tissue with two synthetic genes: + +- **Gene A**: focal cluster at upper-left, plus sparse background. High UMI count (focal source). +- **Gene B**: 500 transcripts with a right-biased spatial distribution. Low UMI count (diffuse source). + +```{r sec1-tissue} +in_kidney <- function(x, y, oa = 9, ob = 7, ncx = 5, nr = 3.8) + (x/oa)^2 + (y/ob)^2 < 1 & sqrt((x - ncx)^2 + y^2) >= nr + +cands <- data.frame(x = runif(18000, -11, 11), y = runif(18000, -9, 9)) +tissue_pts <- cands[in_kidney(cands$x, cands$y), ][1:3000, ] + +tissue_mask <- fit_spatial_mask(tissue_pts, method = "raster", + raster_resolution = 256L, raster_sigma = 0.7, raster_threshold = 0.15, + verbose = FALSE) + +mu_t <- sf::st_union(tissue_mask) +``` + +```{r sec1-transcripts} +# Gene A: focal cluster at (-4.5, 3.8) + sparse background +tc_A_cl <- data.frame(x = rnorm(350, -4.5, 1.2), y = rnorm(350, 3.8, 0.9)) +A_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_A_cl, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L]) +tc_A_cl <- tc_A_cl[A_in, ] +tc_A_sc <- sample_in_mask(tissue_mask, 50) +tc_A <- rbind(tc_A_cl, tc_A_sc) +tc_A$umi <- c(rpois(nrow(tc_A_cl), 15), rpois(nrow(tc_A_sc), 2)) + +# Gene B: right-biased random distribution +tc_B_all <- sample_in_mask(tissue_mask, 2000) +prob_B <- plogis(tc_B_all$x / 3) +tc_B <- tc_B_all[runif(nrow(tc_B_all)) < prob_B, ][1:500, ] +tc_B$umi <- rpois(nrow(tc_B), 5) +``` + +```{r sec1-plot, fig.height=5} +tissue_sf <- sf::st_sf(geometry = tissue_mask) + +ggplot() + + geom_sf(data = tissue_sf, fill = "grey92", color = "grey60", linewidth = 0.4) + + geom_point(data = tc_A, aes(x = x, y = y, color = "Gene A", size = log1p(umi)), + alpha = 0.6) + + geom_point(data = tc_B, aes(x = x, y = y, color = "Gene B", size = log1p(umi)), + alpha = 0.5) + + scale_color_manual(values = c("Gene A" = "#e74c3c", "Gene B" = "#2980b9"), + name = NULL) + + scale_size_continuous(range = c(0.3, 2.5), name = "log(UMI+1)") + + coord_sf(datum = NA) + theme_demo() + + labs(title = "Section 1: Synthetic kidney-bean tissue", + subtitle = "Gene A: focal upper-left | Gene B: broad right-biased", + x = "X", y = "Y") +``` + +--- + +# Section 2 — Method comparison: fd, green, kde + +Three solvers are available. Each makes different assumptions about the domain: + +| Method | How it works | Boundary handling | Best for | +|---|---|---|---| +| `"fd"` | Sparse finite-difference linear system | **Exact** Dirichlet or Neumann BCs | **Default; recommended** | +| `"green"` | FFT convolution with analytic Green's function $K_0(\kappa r)$ | **None** — infinite domain | Fast interior estimates; quick sweeps | +| `"kde"` | Gaussian kernel smoothing | None | Phenomenological baseline | + +```{r sec2, cache=TRUE} +pb <- list(D = 1, lambda = 0.3, production_rate = 1, + grid_resolution = 256L, verbose = FALSE) + +res_fd <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet"), pb)) + +res_green <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "green"), pb)) + +res_kde <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "kde", kde_bandwidth = sqrt(1/0.3)), pb)) + +res_diff <- res_fd +res_diff$field <- res_fd$field - res_green$field +``` + +```{r sec2-plot, fig.height=8} +(plot_field(res_fd, tc_A, title = "2a. fd (Dirichlet)", + subtitle = "Exact BCs; recommended") + + plot_field(res_green, tc_A, title = "2b. green (FFT)", + subtitle = "Infinite domain; no boundary effects")) / +(plot_field(res_kde, tc_A, title = "2c. kde", + subtitle = "Bandwidth = L; phenomenological") + + plot_field(res_diff, title = "2d. fd − green", + subtitle = "fd correctly suppresses near-edge concentration", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE)) + +plot_annotation( + title = "Section 2: Solver Comparison (Gene A, D=1, λ=0.3, L≈1.83)", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**Key observations:** + +- The `fd` and `green` maps look very similar in the interior (panel 2d shows only small differences away from the edges). This confirms that the physics is consistent between methods. +- The **difference map** (2d) shows that `fd` correctly drives concentration to zero at the tissue boundary (Dirichlet BC), while `green` ignores the boundary entirely. This matters most when sources are near the edge. +- `kde` produces a visually similar pattern to `fd` because the bandwidth was set equal to $L$, but it has no physical interpretation and no BCs. + +**When to use `method = "green"`:** When you are doing a rapid parameter sweep over many $L$ values and don't need exact boundary conditions. It runs faster than `fd` at high resolution. + +--- + +# Section 3 — Diffusion length sweep: the most important parameter + +The diffusion length $L = \sqrt{D/\lambda}$ is the primary parameter to tune. It determines how far each transcript's signal spreads. Small $L$: fields are tight and peaked around clusters. Large $L$: fields fill the entire tissue. + +```{r sec3, cache=TRUE} +L_vals <- c(0.5, 1.5, 4.0, 10.0) + +sw3 <- lapply(L_vals, function(Lv) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, diffusion_length = Lv, production_rate = 1, + method = "fd", fd_solver = "direct", grid_resolution = 256L, + verbose = FALSE) + # Normalise each panel independently for visual comparison + f <- res$field + res$field <- (f - min(f, na.rm = TRUE)) / diff(range(f, na.rm = TRUE)) + plot_field(res, tc_A, + title = sprintf("L = %.1f", Lv), + subtitle = sprintf("λ ≈ %.3f", 1/Lv^2), + fill_label = "Norm. Conc.", pt_size = 0.4) + + scale_fill_viridis_c(option = "magma", name = "Norm.\nConc.", + limits = c(0, 1), na.value = "transparent") +}) +``` + +```{r sec3-plot, fig.height=4} +wrap_plots(sw3, nrow = 1) + + plot_annotation( + title = "Section 3: Diffusion Length Sweep (each panel normalised independently)", + subtitle = "Small L → tight peaks | Large L → tissue-wide field", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +```{r sec3-peaks} +peaks <- sapply(L_vals, function(Lv) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, diffusion_length = Lv, + method = "fd", fd_solver = "direct", grid_resolution = 256L, + verbose = FALSE) + max(res$field, na.rm = TRUE) +}) +data.frame(L = L_vals, peak_concentration = round(peaks, 5)) +``` + +### How to choose L in practice + +There is no single correct answer — the right $L$ is biologically motivated. Ask: + +1. **What is the typical inter-cluster distance?** $L$ should be 10–20% of this distance if you want fields that peak at sources and drop between them. Larger values produce smoother, more "territorial" fields. +2. **Are you modelling a small diffusing molecule or a large one?** Small molecules (e.g. metabolites, small cytokines) have higher $D$ and thus larger $L$. Large molecules (e.g. ECM-bound ligands) have small $D$. +3. **Does your field look biologically reasonable?** Inspect the output and ask whether the gradients match known biology. + +> **Shortcut:** set `diffusion_length = L` directly to bypass the `D`/`lambda` decomposition. The shape depends only on $L$; $D$ only affects amplitude. + +--- + +# Section 4 — Fixed L, varying D and lambda + +This section makes the key mathematical point: **field shape is determined entirely by $L$**. Changing $D$ and $\lambda$ while keeping $L = \sqrt{D/\lambda}$ constant produces identical spatial patterns, only scaling the amplitude by $1/D$. + +```{r sec4, cache=TRUE} +DL_cases <- list( + list(D = 0.1, lam = 0.025, label = "D=0.1, λ=0.025"), + list(D = 1, lam = 0.25, label = "D=1, λ=0.25"), + list(D = 5, lam = 1.25, label = "D=5, λ=1.25"), + list(D = 20, lam = 5, label = "D=20, λ=5") +) + +dl_pl <- lapply(DL_cases, function(co) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = co$D, lambda = co$lam, production_rate = 1, + method = "fd", fd_solver = "direct", grid_resolution = 256L, + verbose = FALSE) + pk <- round(max(res$field, na.rm = TRUE), 5) + plot_field(res, tc_A, + title = co$label, + subtitle = sprintf("L=2 fixed | peak=%.5f", pk), + pt_size = 0.3) +}) +``` + +```{r sec4-plot, fig.height=4} +wrap_plots(dl_pl, nrow = 1) + + plot_annotation( + title = "Section 4: Fixed L=2, Varying D and λ", + subtitle = "Shape IDENTICAL (same L). Amplitude ∝ 1/D.", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +**Implication for parameter setting:** When fitting to data, first determine the spatial extent of influence ($L$), then fix either $D$ or $\lambda$ based on biophysical priors. The third parameter is then determined. + +--- + +# Section 5 — Boundary conditions + +The `fd` method supports two boundary conditions: + +- **Dirichlet** (`boundary_condition = "dirichlet"`): $C = 0$ at the tissue boundary. Physically: molecules are absorbed or degrade instantly at the boundary. This is appropriate for most spatial transcriptomics sections where the tissue edge is a hard barrier. +- **Neumann** (`boundary_condition = "neumann"`): $\partial C / \partial n = 0$ at the boundary (zero normal flux). Physically: no molecules cross the boundary — they are reflected back in. This is appropriate when modelling a sealed container. + +```{r sec5, cache=TRUE} +res_dir <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.2, method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet", grid_resolution = 256L, verbose = FALSE) + +res_neu <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.2, method = "fd", fd_solver = "direct", + boundary_condition = "neumann", grid_resolution = 256L, verbose = FALSE) + +res_bcd <- res_dir +res_bcd$field <- res_neu$field - res_dir$field +``` + +```{r sec5-crosssection} +sl <- function(res, lbl) { + df <- field_to_df(res) + s <- df[abs(df$y) < res$hy * 1.5 & !is.na(df$field), ] + s <- s[order(s$x), ]; s$BC <- lbl; s +} +sl_all <- rbind(sl(res_dir, "Dirichlet"), sl(res_neu, "Neumann")) + +p5d <- ggplot(sl_all, aes(x = x, y = field, color = BC)) + + geom_line(linewidth = 0.8) + + scale_color_manual(values = c("Dirichlet" = "#e74c3c", + "Neumann" = "#2980b9")) + + labs(title = "5d. Cross-section at y ≈ 0", + subtitle = "Dirichlet drops to 0; Neumann stays elevated", + x = "X", y = "Conc.", color = "BC") + + theme_demo() +``` + +```{r sec5-plot, fig.height=8} +(plot_field(res_dir, tc_A, + title = "5a. Dirichlet (C=0 at boundary)", + subtitle = "D=1, λ=0.2, L≈2.24") + + plot_field(res_neu, tc_A, + title = "5b. Neumann (∂C/∂n=0)", + subtitle = "Molecules reflected; higher near edges", + palette = "plasma")) / +(plot_field(res_bcd, + title = "5c. Neumann − Dirichlet", + subtitle = "Neumann retains more concentration near boundary", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE) + + p5d) + +plot_annotation( + title = "Section 5: Boundary Condition Comparison", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**Recommendation:** Use `"dirichlet"` for tissue sections. Neumann BCs are mainly useful when comparing to analytic solutions or modelling sealed compartments. + +--- + +# Section 6 — UMI weighting vs. equal weighting + +Each transcript can carry a weight (e.g. its UMI count) via the `weight_col` argument. This means a cell with 50 detected transcripts of a gene contributes 50× more source density than a cell with 1. + +```{r sec6, cache=TRUE} +res_uw <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = NULL, verbose = FALSE) + +res_wt <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = "umi", verbose = FALSE) + +# Normalise both for visual comparison +nrm <- function(r) { + f <- r$field + r$field <- (f - min(f, na.rm=TRUE)) / diff(range(f, na.rm=TRUE)) + r +} +``` + +```{r sec6-plot, fig.height=4} +plot_field(nrm(res_uw), tc_A, + title = "6a. Unweighted", + subtitle = "All transcripts equal weight", + fill_label = "Norm. Conc.") + +plot_field(nrm(res_wt), tc_A, + title = "6b. UMI-weighted", + subtitle = "High-UMI cluster source amplified", + palette = "plasma", + fill_label = "Norm. Conc.") + +plot_annotation( + title = "Section 6: UMI Weighting", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**When to use UMI weighting:** When transcript counts meaningfully reflect expression level differences. Note that this amplifies both true signal and noise — noisy high-UMI outliers can dominate the field. Inspect your UMI distribution before applying weights. + +--- + +# Section 7 — External transcripts + +`include_external = TRUE` bins transcripts that fall outside the mask into the nearest inside-edge grid cell, allowing them to contribute to the field. This is useful when you have transcripts just outside the mask boundary (e.g. due to mask fitting inaccuracy) that you still want to include. + +```{r sec7, cache=TRUE} +tc_ext <- data.frame(x = rnorm(80, 6.5, 0.4), + y = rnorm(80, 0, 0.5), + umi = rpois(80, 10)) +ext_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_ext, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L]) +tc_ext <- tc_ext[!ext_in, ] # keep only those outside the mask +tc_Ax <- rbind(tc_A, tc_ext) + +res_ne <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_Ax, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, include_external = FALSE, verbose = FALSE) + +res_we <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_Ax, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, include_external = TRUE, verbose = FALSE) + +ext_pt_layer <- geom_point(data = tc_ext, aes(x = x, y = y), + color = "orange", size = 0.9, alpha = 0.8, + inherit.aes = FALSE) +``` + +```{r sec7-plot, fig.height=4} +(plot_field(res_ne, tc_A, title = "7a. include_external = FALSE") + + ext_pt_layer) + +(plot_field(res_we, tc_A, title = "7b. include_external = TRUE", + palette = "plasma") + + ext_pt_layer) + +plot_annotation( + title = "Section 7: External Transcripts (orange = outside mask)", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +--- + +# Section 8 — Holey mask: vascular clearance + +This section demonstrates a key strength of `method = "fd"`: it correctly handles holes (e.g. vessel lumens) within the tissue mask. The `fd` solver zeroes the concentration inside each hole, modelling vessels as clearance sinks. The `green` FFT method ignores topology entirely — it treats the holes as normal tissue. + +```{r sec8-mask, cache=TRUE} +vd <- list(list(cx = -2, cy = 1.5, r = 1.2), + list(cx = 2.5, cy = -2, r = 1.0), + list(cx = -4.5, cy = -2.5, r = 0.8)) +in_v <- function(x, y) Reduce(`|`, lapply(vd, function(v) + sqrt((x - v$cx)^2 + (y - v$cy)^2) < v$r)) + +n_h <- 3000 +cnd_h <- data.frame(x = runif(n_h * 8, -11, 11), + y = runif(n_h * 8, -9, 9)) +kp_h <- in_kidney(cnd_h$x, cnd_h$y) & !in_v(cnd_h$x, cnd_h$y) +pts_h <- cnd_h[kp_h, ][1:n_h, ] + +holey_mask <- fit_spatial_mask(pts_h, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, + raster_threshold = 0.2, verbose = FALSE) + +tc_h <- sample_in_mask(holey_mask, 400) +``` + +```{r sec8-solve, cache=TRUE} +res_hfd <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 300L, verbose = FALSE) + +res_htight <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 1.0, method = "fd", fd_solver = "direct", + grid_resolution = 300L, verbose = FALSE) + +res_hgrn <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 0.15, method = "green", + grid_resolution = 300L, verbose = FALSE) + +res_hdiff <- res_hfd +res_hdiff$field <- res_hfd$field - res_hgrn$field +``` + +```{r sec8-plot, fig.height=8} +(plot_field(res_hfd, tc_h, + title = "8a. fd | L ≈ 2.58", + subtitle = "Vessel lumens correctly zeroed", + pt_size = 0.3) + + plot_field(res_htight, tc_h, + title = "8b. fd | L = 1 (tight)", + subtitle = "Steeper depletion near vessels", + palette = "inferno", pt_size = 0.3)) / +(plot_field(res_hgrn, tc_h, + title = "8c. green — topology blind", + subtitle = "FFT ignores holes entirely", + palette = "plasma", pt_size = 0.3) + + plot_field(res_hdiff, + title = "8d. fd − green", + subtitle = "fd zeros lumens; green does not", + symmetric = TRUE, show_contours = FALSE, + show_pts = FALSE)) + +plot_annotation( + title = "Section 8: Holey Mask — Vascular Clearance", + subtitle = "Use method='fd' whenever the mask has holes", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +> **Important:** If your tissue mask has holes (vessel lumens, necrotic cores), always use `method = "fd"`. The `green` and `kde` methods are unaware of domain topology. + +--- + +# Section 9 — Wide dynamic range and log-scale visualisation + +When transcript counts span several orders of magnitude (e.g. a dense focal cluster alongside sparse background), the linear concentration scale will be dominated by the peak and the background will appear flat. The `log1p` transform reveals structure at both ends of the dynamic range. + +```{r sec9, cache=TRUE} +tc_w <- rbind( + data.frame(x = rnorm(20, -5, 0.3), y = rnorm(20, 4, 0.3), umi = rpois(20, 200)), + data.frame(x = rnorm(300, 0, 4), y = rnorm(300, 0, 3), umi = rpois(300, 1))) +tc_w_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_w[, c("x","y")], coords = c("x","y")), + mu_t, sparse = FALSE)[, 1L]) +tc_w <- tc_w[tc_w_in, ] + +res_w <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_w, + D = 1, lambda = 0.5, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = "umi", verbose = FALSE) +``` + +```{r sec9-plot, fig.height=4} +plot_field(res_w, tc_w, + title = "9a. Linear scale", + subtitle = "Background invisible; cluster dominates", + pt_color = "white") + +plot_field(res_w, tc_w, + log_scale = TRUE, + title = "9b. log₁₊ scale", + subtitle = "Background and cluster both visible", + palette = "viridis", pt_color = "white") + +plot_annotation( + title = "Section 9: Wide Dynamic Range", + subtitle = "Use log_scale=TRUE when max/min UMI ratio > ~20", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +You can also apply the log transform inside the solver with `log_transform = TRUE`, which is equivalent but applies the transform before returning the field matrix. + +--- + +# Section 10 — Two-gene ratio map + +A **log₂(A/B) ratio map** shows where Gene A has higher concentration relative to Gene B, and vice versa. This is analogous to a log-fold-change map, but in space rather than between conditions. Blue regions favour B; yellow/red regions favour A; white is balanced. + +```{r sec10, cache=TRUE} +ph <- list(D = 1, lambda = 0.25, production_rate = 1, + method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = FALSE) + +res_gA <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A), ph)) +res_gB <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_B), ph)) + +res_rt <- res_gA +res_rt$field <- log2((res_gA$field + 1e-6) / (res_gB$field + 1e-6)) +``` + +```{r sec10-plot, fig.height=8} +p_ratio <- plot_field(res_rt, + title = "10c. log₂(A/B) ratio", + subtitle = "Red=A territory | Blue=B territory | White=balanced", + symmetric = TRUE, show_contours = TRUE, n_contours = 5, + show_pts = FALSE, fill_label = "log₂(A/B)") + + geom_point(data = tc_A, aes(x = x, y = y), + color = "#e74c3c", size = 0.3, alpha = 0.4, + inherit.aes = FALSE) + + geom_point(data = tc_B, aes(x = x, y = y), + color = "#27ae60", size = 0.3, alpha = 0.4, + inherit.aes = FALSE) + +p_dens <- ggplot(field_to_df(res_rt), aes(x = field, + fill = ifelse(field < 0, "B dominant", "A dominant"))) + + geom_density(alpha = 0.55) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_fill_manual(values = c("B dominant" = "#2980b9", + "A dominant" = "#c0392b"), + name = NULL) + + labs(title = "10d. Distribution of log₂(A/B)", + subtitle = "Fraction of tissue in each territory", + x = "log₂(A/B)", y = "Density") + + theme_demo() + +(plot_field(res_gA, tc_A, title = "10a. Gene A field") + + plot_field(res_gB, tc_B, title = "10b. Gene B field", palette = "viridis")) / +(p_ratio + p_dens) + +plot_annotation( + title = "Section 10: Two-Gene Ratio Map", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +The `1e-6` pseudocount in the ratio calculation prevents division-by-zero and log(0) in regions where one gene has near-zero concentration. + +--- + +# Section 11 — 100k transcripts and solver timing + +This section benchmarks the three solvers on 100,000 transcripts at two grid resolutions. + +```{r sec11, cache=TRUE} +tc_100k <- sample_in_mask(tissue_mask, 100000) +cat(sprintf("Sampled %d transcripts.\n", nrow(tc_100k))) + +t1 <- system.time(r1 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = TRUE)) + +t2 <- system.time(r2 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "fd", fd_solver = "iterative", + sor_omega = 1.75, grid_resolution = 512L, verbose = TRUE)) + +t3 <- system.time(r3 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "green", + grid_resolution = 512L, verbose = TRUE)) +``` + +```{r sec11-table} +data.frame( + Solver = c("fd/direct (N=256)", "fd/SOR (N=512)", "green/FFT (N=512)"), + Time_s = round(c(t1["elapsed"], t2["elapsed"], t3["elapsed"]), 2), + Has_BCs = c("Yes", "Yes", "No"), + Handles_holes = c("Yes", "Yes", "No"), + Notes = c("Sparse LU; recommended up to N=400", + "Iterative; use for N > 400", + "Fastest; infinite domain only") +) +``` + +```{r sec11-plot, fig.height=4} +sub <- tc_100k[sample(nrow(tc_100k), 2000), ] +plot_field(r1, sub, + title = sprintf("11a. fd/direct (%.1f s)", t1["elapsed"]), + subtitle = "N=256 | sparse LU", + pt_size = 0.15, pt_alpha = 0.2) + +plot_field(r2, sub, + title = sprintf("11b. fd/SOR (%.1f s)", t2["elapsed"]), + subtitle = "N=512 | iterative", + palette = "plasma", pt_size = 0.15, pt_alpha = 0.2) + +plot_field(r3, sub, + title = sprintf("11c. green/FFT (%.1f s)", t3["elapsed"]), + subtitle = "N=512 | FFT", + palette = "viridis", pt_size = 0.15, pt_alpha = 0.2) + +plot_annotation( + title = "Section 11: 100k Transcripts — Solver Comparison", + subtitle = "2,000 of 100,000 points shown", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +### Solver selection guide + +| Situation | Recommended solver | +|---|---| +| N ≤ 400, holey mask, or exact BCs needed | `fd`, `fd_solver = "direct"` | +| N > 400, high-res output needed | `fd`, `fd_solver = "iterative"` (SOR) | +| Fast sweep over L values, no BCs | `"green"` | +| Quick visual check | `"kde"` | + +--- + +# Section 12 — Quantitative field extraction + +Beyond visualisation, the field output can be queried numerically. Here we demonstrate the main extraction operations. + +```{r sec12} +res_q <- res_fd # reuse the Gene A result from Section 2 + +# ---- Bilinear interpolation at arbitrary query points ---- +interpolate_field <- function(result, query_pts) { + xg <- result$x; yg <- result$y; f <- result$field + vapply(seq_len(nrow(query_pts)), function(i) { + qx <- query_pts$x[i]; qy <- query_pts$y[i] + xi <- findInterval(qx, xg); yi <- findInterval(qy, yg) + if (xi < 1 || xi >= length(xg) || yi < 1 || yi >= length(yg)) + return(NA_real_) + wx <- (qx - xg[xi]) / (xg[xi+1] - xg[xi]) + wy <- (qy - yg[yi]) / (yg[yi+1] - yg[yi]) + (1-wx)*(1-wy)*f[xi,yi] + wx*(1-wy)*f[xi+1,yi] + + (1-wx)*wy *f[xi,yi+1] + wx*wy *f[xi+1,yi+1] + }, numeric(1)) +} +``` + +```{r sec12-queries} +queries <- data.frame( + label = c("Cluster core", "Cluster edge", "Tissue centre", + "Far right", "Near notch"), + x = c(-4.5, -3.0, 0.0, 5.0, 3.0), + y = c( 3.8, 2.5, 0.0, 0.0, 0.0)) +queries$concentration <- interpolate_field(res_q, queries) +queries +``` + +```{r sec12-stats} +df_q <- field_to_df(res_q) + +# 12b: Mean concentration by tissue half +cat(sprintf("Mean concentration:\n Left half (x < 0): %.5f\n Right half (x ≥ 0): %.5f\n", + mean(df_q$field[df_q$x < 0 & !is.na(df_q$field)]), + mean(df_q$field[df_q$x >= 0 & !is.na(df_q$field)]))) + +# 12c: Peak location +pk <- which.max(res_q$field) +N <- length(res_q$x) +cat(sprintf("\nPeak: x=%.2f, y=%.2f, value=%.6f\n", + res_q$x[((pk-1L) %% N) + 1L], + res_q$y[((pk-1L) %/% N) + 1L], + res_q$field[pk])) + +# 12d: Field integral +total <- sum(res_q$field[res_q$mask], na.rm = TRUE) * res_q$hx * res_q$hy +theory <- nrow(tc_A) * 1.0 / 0.3 # n * production_rate / lambda +cat(sprintf("\nField integral: %.4f\nTheory (n·r/λ): %.4f\nAgreement: %.1f%%\n", + total, theory, 100 * (1 - abs(total - theory) / theory))) +``` + +```{r sec12-profile, fig.height=3.5} +prof <- df_q[abs(df_q$y) < res_q$hy * 1.5 & !is.na(df_q$field), ] +ggplot(prof[order(prof$x), ], aes(x = x, y = field)) + + geom_line(color = "#e74c3c", linewidth = 0.8) + + geom_area(fill = "#e74c3c", alpha = 0.15) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") + + labs(title = "12e. Concentration profile along y ≈ 0", + subtitle = "Cluster peak at x ≈ −4.5; near-zero at right edge", + x = "X", y = "Concentration") + + theme_demo() +``` + +--- + +# Parameter quick-reference + +``` +estimate_concentration_field() — parameter guide +───────────────────────────────────────────────────────────────────── + +PHYSICS + diffusion_length Primary tuning knob. Set directly: L = 0.5 to 10 + Rule: L ~ 10–20% of inter-cluster distance + D Affects amplitude only (amplitude ∝ 1/D) + lambda Affects amplitude only; lambda = D / L^2 + production_rate Global amplitude scaling; use weight_col for per-cell + +SOLVER + method = "fd" Default. Use whenever you need accurate BCs or holes. + method = "green" Faster for sweeps; ignores domain shape. + method = "kde" Quick baseline; no physics. + + fd_solver = "direct" N ≤ 400 (sparse LU decomposition) + fd_solver = "iterative" N > 400 (red-black SOR) + + boundary_condition = "dirichlet" C=0 at boundary. Default. Tissue sections. + boundary_condition = "neumann" dC/dn=0. Sealed system. + + grid_resolution = 256 Explore. 512 for publication quality. + +WEIGHTS + weight_col = "umi" Use UMI counts as source strength. + Careful: amplifies outliers. + +POST-PROCESSING + log_transform = TRUE Apply log1p() after solving. Useful for wide dynamic range. + normalize = TRUE Rescale to [0,1] for comparison across conditions. + clip_negative = TRUE Remove small numerical negatives. Default on. +───────────────────────────────────────────────────────────────────── +``` + +--- + +# Session information + +```{r session-info} +sessionInfo() +``` diff --git a/spatial-modeling-with-claude/vignette_concentration_field.html b/spatial-modeling-with-claude/vignette_concentration_field.html new file mode 100644 index 0000000..83e4d19 --- /dev/null +++ b/spatial-modeling-with-claude/vignette_concentration_field.html @@ -0,0 +1,2928 @@ + + + + + + + + + + + + + + + +Estimating Steady-State Molecular Concentration Fields + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + +
+
+

Overview

+

Spatial transcriptomics data gives us discrete transcript or cell +locations, but many biological processes — ligand gradients, cytokine +fields, metabolite concentrations — are continuous. +estimate_concentration_field() bridges this gap by +computing the steady-state concentration field that a +set of point sources (mRNA transcripts, detected proteins) would produce +under a physically motivated diffusion-clearance model.

+

The result is a dense concentration map over the tissue, interpolated +from point source locations. This can be used to:

+
    +
  • Identify where a signalling molecule is most concentrated
  • +
  • Compute the concentration seen by each cell
  • +
  • Create two-gene ratio maps (spatial domains of ligand/receptor +balance)
  • +
  • Visualise how diffusion length affects signal range
  • +
+
+
+
+

The physical model

+

The function solves the screened Poisson equation +(also called the steady-state diffusion-clearance PDE):

+

\[D \nabla^2 C(\mathbf{x}) - \lambda +C(\mathbf{x}) + s(\mathbf{x}) = 0\]

+

where:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SymbolMeaningParameter
\(C(\mathbf{x})\)Steady-state concentration at position x— (what we solve for)
\(D\)Diffusion coefficient (spatial spread rate)D
\(\lambda\)First-order clearance ratelambda
\(s(\mathbf{x})\)Source density (production rate per unit area)production_rate
+

The key derived quantity is the diffusion +length:

+

\[L = \sqrt{D / \lambda}\]

+

\(L\) sets the characteristic +distance over which concentration decays from a point source. It is the +single most important parameter to tune. The shape of the field depends +only on \(L\); the amplitude scales as +\(1/D\).

+
+

Practical interpretation: \(L\) is roughly the radius of influence of a +single transcript. At distance \(r \gg +L\) from a source, the concentration is negligible. At \(r \ll L\), concentration is near its +maximum.

+
+
+
+
+

Setup

+
library(sf)
+library(ggplot2)
+library(patchwork)
+source("fit_spatial_mask.R")
+source("estimate_concentration_field.R")
+
+set.seed(2024)
+
+
+
+

Shared helpers

+
# ---- Plot theme ----
+theme_demo <- function(bs = 9.5)
+  theme_minimal(base_size = bs) +
+  theme(plot.title    = element_text(face = "bold", size = bs + 0.5),
+        plot.subtitle = element_text(size = bs - 1, color = "grey40",
+                                     margin = margin(b = 4)),
+        panel.grid    = element_line(color = "grey94"),
+        legend.key.width  = unit(0.35, "cm"),
+        legend.title      = element_text(size = bs - 1),
+        legend.text       = element_text(size = bs - 1.5),
+        axis.text         = element_text(size = bs - 2))
+
+# ---- Field visualiser ----
+# Uses geom_raster (correct for continuous fields on regular grids).
+# show_contours overlays iso-concentration lines.
+plot_field <- function(result, transcripts = NULL,
+                       title = "", subtitle = "",
+                       palette = "magma", log_scale = FALSE,
+                       show_pts = TRUE, show_contours = TRUE, n_contours = 6L,
+                       pt_size = 0.35, pt_alpha = 0.45, pt_color = "#00e5ff",
+                       fill_label = "Conc.", symmetric = FALSE) {
+  df       <- field_to_df(result, inside_only = TRUE)
+  fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field"
+  fill_lbl <- if (log_scale) paste0("log\u2081\u208a(", fill_label, ")") else fill_label
+  p <- ggplot(df, aes(x = x, y = y, fill = .data[[fill_col]])) +
+    geom_raster(interpolate = TRUE) + coord_equal() +
+    labs(title = title, subtitle = subtitle, x = "X", y = "Y") +
+    theme_demo()
+  if (symmetric) {
+    lim <- max(abs(df[[fill_col]]), na.rm = TRUE)
+    p   <- p + scale_fill_distiller(palette = "RdBu", limits = c(-lim, lim),
+                                     name = fill_lbl, na.value = "transparent")
+  } else {
+    p <- p + scale_fill_viridis_c(option = palette, name = fill_lbl,
+                                   na.value = "transparent")
+  }
+  if (show_contours && any(!is.na(df$field)))
+    p <- p + geom_contour(aes(z = field), color = "white",
+                          alpha = 0.35, linewidth = 0.25, bins = n_contours)
+  if (show_pts && !is.null(transcripts))
+    p <- p + geom_point(data = transcripts, aes(x = x, y = y),
+                        inherit.aes = FALSE, color = pt_color,
+                        size = pt_size, alpha = pt_alpha)
+  p
+}
+
+# ---- Sample points uniformly from inside a mask ----
+sample_in_mask <- function(mask_poly, n, max_tries = 20) {
+  bb  <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly)
+  pts <- data.frame(x = numeric(0), y = numeric(0))
+  for (i in seq_len(max_tries)) {
+    if (nrow(pts) >= n) break
+    cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]),
+                      y = runif(n * 8, bb["ymin"], bb["ymax"]))
+    ok  <- as.logical(sf::st_within(
+      sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L])
+    pts <- rbind(pts, cnd[ok, ])
+  }
+  pts[seq_len(min(n, nrow(pts))), ]
+}
+
+
+
+

Function reference: estimate_concentration_field()

+
estimate_concentration_field(
+  mask,               # sfc polygon — output of fit_spatial_mask()
+  transcript_coords,  # data.frame with columns x and y (plus optional weight_col)
+
+  # Physics
+  D                  = 1.0,      # diffusion coefficient (arbitrary units)
+  lambda             = 0.1,      # first-order clearance rate
+  diffusion_length   = NULL,     # override: set L directly; NULL uses sqrt(D/lambda)
+  production_rate    = 1.0,      # multiplied by weights if weight_col is set
+
+  # Solver
+  method             = "fd",     # "fd" | "green" | "kde"
+  grid_resolution    = 256L,     # grid cells per axis (N × N)
+  boundary_condition = "dirichlet",  # "dirichlet" | "neumann"  (fd only)
+  fd_solver          = "direct", # "direct" | "iterative"  (fd only)
+
+  # Weighting
+  weight_col         = NULL,     # column name for per-transcript weights (e.g. "umi")
+
+  # Optional
+  include_external   = FALSE,    # include transcripts outside mask?
+  normalize          = FALSE,    # rescale field to [0, 1]?
+  log_transform      = FALSE,    # apply log1p() after solving?
+  clip_negative      = TRUE,     # set small negatives to 0?
+  n_cores            = 1L,
+  plot               = FALSE,
+  verbose            = TRUE
+)
+
+

Return value

+

A named list containing:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ElementContents
fieldN×N matrix; NA outside the mask
x, yGrid centre coordinates (length N each)
hx, hyGrid cell width and height
maskN×N logical matrix (TRUE = inside mask)
sourcesN×N matrix of binned source density
paramsList of all input parameters
diagnosticsTiming, counts, etc.
+

Use field_to_df(result) to convert to a tidy data frame +for ggplot.

+
+
+
+
+

Section 1 — Synthetic tissue and transcript layout

+

We build a kidney-bean-shaped tissue with two synthetic genes:

+
    +
  • Gene A: focal cluster at upper-left, plus sparse +background. High UMI count (focal source).
  • +
  • Gene B: 500 transcripts with a right-biased spatial +distribution. Low UMI count (diffuse source).
  • +
+
in_kidney <- function(x, y, oa = 9, ob = 7, ncx = 5, nr = 3.8)
+  (x/oa)^2 + (y/ob)^2 < 1 & sqrt((x - ncx)^2 + y^2) >= nr
+
+cands      <- data.frame(x = runif(18000, -11, 11), y = runif(18000, -9, 9))
+tissue_pts <- cands[in_kidney(cands$x, cands$y), ][1:3000, ]
+
+tissue_mask <- fit_spatial_mask(tissue_pts, method = "raster",
+  raster_resolution = 256L, raster_sigma = 0.7, raster_threshold = 0.15,
+  verbose = FALSE)
+
+mu_t <- sf::st_union(tissue_mask)
+
# Gene A: focal cluster at (-4.5, 3.8) + sparse background
+tc_A_cl <- data.frame(x = rnorm(350, -4.5, 1.2), y = rnorm(350, 3.8, 0.9))
+A_in    <- as.logical(sf::st_within(
+  sf::st_as_sf(tc_A_cl, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L])
+tc_A_cl <- tc_A_cl[A_in, ]
+tc_A_sc <- sample_in_mask(tissue_mask, 50)
+tc_A    <- rbind(tc_A_cl, tc_A_sc)
+tc_A$umi <- c(rpois(nrow(tc_A_cl), 15), rpois(nrow(tc_A_sc), 2))
+
+# Gene B: right-biased random distribution
+tc_B_all <- sample_in_mask(tissue_mask, 2000)
+prob_B   <- plogis(tc_B_all$x / 3)
+tc_B     <- tc_B_all[runif(nrow(tc_B_all)) < prob_B, ][1:500, ]
+tc_B$umi <- rpois(nrow(tc_B), 5)
+
tissue_sf <- sf::st_sf(geometry = tissue_mask)
+
+ggplot() +
+  geom_sf(data = tissue_sf, fill = "grey92", color = "grey60", linewidth = 0.4) +
+  geom_point(data = tc_A, aes(x = x, y = y, color = "Gene A", size = log1p(umi)),
+             alpha = 0.6) +
+  geom_point(data = tc_B, aes(x = x, y = y, color = "Gene B", size = log1p(umi)),
+             alpha = 0.5) +
+  scale_color_manual(values = c("Gene A" = "#e74c3c", "Gene B" = "#2980b9"),
+                     name = NULL) +
+  scale_size_continuous(range = c(0.3, 2.5), name = "log(UMI+1)") +
+  coord_sf(datum = NA) + theme_demo() +
+  labs(title    = "Section 1: Synthetic kidney-bean tissue",
+       subtitle = "Gene A: focal upper-left | Gene B: broad right-biased",
+       x = "X", y = "Y")
+

+
+
+
+

Section 2 — Method comparison: fd, green, kde

+

Three solvers are available. Each makes different assumptions about +the domain:

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodHow it worksBoundary handlingBest for
"fd"Sparse finite-difference linear systemExact Dirichlet or Neumann BCsDefault; recommended
"green"FFT convolution with analytic Green’s function \(K_0(\kappa r)\)None — infinite domainFast interior estimates; quick sweeps
"kde"Gaussian kernel smoothingNonePhenomenological baseline
+
pb <- list(D = 1, lambda = 0.3, production_rate = 1,
+           grid_resolution = 256L, verbose = FALSE)
+
+res_fd    <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A,
+         method = "fd", fd_solver = "direct",
+         boundary_condition = "dirichlet"), pb))
+
+res_green <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A,
+         method = "green"), pb))
+
+res_kde   <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A,
+         method = "kde", kde_bandwidth = sqrt(1/0.3)), pb))
+
+res_diff  <- res_fd
+res_diff$field <- res_fd$field - res_green$field
+
(plot_field(res_fd,    tc_A, title = "2a. fd (Dirichlet)",
+            subtitle = "Exact BCs; recommended") +
+ plot_field(res_green, tc_A, title = "2b. green (FFT)",
+            subtitle = "Infinite domain; no boundary effects")) /
+(plot_field(res_kde,   tc_A, title = "2c. kde",
+            subtitle = "Bandwidth = L; phenomenological") +
+ plot_field(res_diff, title = "2d. fd − green",
+            subtitle = "fd correctly suppresses near-edge concentration",
+            symmetric = TRUE, show_contours = FALSE, show_pts = FALSE)) +
+plot_annotation(
+  title = "Section 2: Solver Comparison (Gene A, D=1, λ=0.3, L≈1.83)",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

Key observations:

+
    +
  • The fd and green maps look very similar in +the interior (panel 2d shows only small differences away from the +edges). This confirms that the physics is consistent between +methods.
  • +
  • The difference map (2d) shows that fd +correctly drives concentration to zero at the tissue boundary (Dirichlet +BC), while green ignores the boundary entirely. This +matters most when sources are near the edge.
  • +
  • kde produces a visually similar pattern to +fd because the bandwidth was set equal to \(L\), but it has no physical interpretation +and no BCs.
  • +
+

When to use method = "green": When you +are doing a rapid parameter sweep over many \(L\) values and don’t need exact boundary +conditions. It runs faster than fd at high resolution.

+
+
+
+

Section 3 — Diffusion length sweep: the most important +parameter

+

The diffusion length \(L = +\sqrt{D/\lambda}\) is the primary parameter to tune. It +determines how far each transcript’s signal spreads. Small \(L\): fields are tight and peaked around +clusters. Large \(L\): fields fill the +entire tissue.

+
L_vals <- c(0.5, 1.5, 4.0, 10.0)
+
+sw3 <- lapply(L_vals, function(Lv) {
+  res <- estimate_concentration_field(
+    mask = tissue_mask, transcript_coords = tc_A,
+    D = 1, diffusion_length = Lv, production_rate = 1,
+    method = "fd", fd_solver = "direct", grid_resolution = 256L,
+    verbose = FALSE)
+  # Normalise each panel independently for visual comparison
+  f <- res$field
+  res$field <- (f - min(f, na.rm = TRUE)) / diff(range(f, na.rm = TRUE))
+  plot_field(res, tc_A,
+             title      = sprintf("L = %.1f", Lv),
+             subtitle   = sprintf("λ ≈ %.3f", 1/Lv^2),
+             fill_label = "Norm. Conc.", pt_size = 0.4) +
+  scale_fill_viridis_c(option = "magma", name = "Norm.\nConc.",
+                        limits = c(0, 1), na.value = "transparent")
+})
+
wrap_plots(sw3, nrow = 1) +
+  plot_annotation(
+    title    = "Section 3: Diffusion Length Sweep (each panel normalised independently)",
+    subtitle = "Small L → tight peaks | Large L → tissue-wide field",
+    theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                     plot.subtitle = element_text(size = 9, color = "grey40")))
+

+
peaks <- sapply(L_vals, function(Lv) {
+  res <- estimate_concentration_field(
+    mask = tissue_mask, transcript_coords = tc_A,
+    D = 1, diffusion_length = Lv,
+    method = "fd", fd_solver = "direct", grid_resolution = 256L,
+    verbose = FALSE)
+  max(res$field, na.rm = TRUE)
+})
+data.frame(L = L_vals, peak_concentration = round(peaks, 5))
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Lpeak_concentration
0.510.96477
1.539.55711
4.078.01534
10.098.86167
+
+
+

How to choose L in practice

+

There is no single correct answer — the right \(L\) is biologically motivated. Ask:

+
    +
  1. What is the typical inter-cluster distance? \(L\) should be 10–20% of this distance if +you want fields that peak at sources and drop between them. Larger +values produce smoother, more “territorial” fields.
  2. +
  3. Are you modelling a small diffusing molecule or a large +one? Small molecules (e.g. metabolites, small cytokines) have +higher \(D\) and thus larger \(L\). Large molecules (e.g. ECM-bound +ligands) have small \(D\).
  4. +
  5. Does your field look biologically reasonable? +Inspect the output and ask whether the gradients match known +biology.
  6. +
+
+

Shortcut: set diffusion_length = L +directly to bypass the D/lambda decomposition. +The shape depends only on \(L\); \(D\) only affects amplitude.

+
+
+
+
+
+

Section 4 — Fixed L, varying D and lambda

+

This section makes the key mathematical point: field shape is +determined entirely by \(L\). +Changing \(D\) and \(\lambda\) while keeping \(L = \sqrt{D/\lambda}\) constant produces +identical spatial patterns, only scaling the amplitude by \(1/D\).

+
DL_cases <- list(
+  list(D = 0.1, lam = 0.025, label = "D=0.1, λ=0.025"),
+  list(D = 1,   lam = 0.25,  label = "D=1,   λ=0.25"),
+  list(D = 5,   lam = 1.25,  label = "D=5,   λ=1.25"),
+  list(D = 20,  lam = 5,     label = "D=20,  λ=5")
+)
+
+dl_pl <- lapply(DL_cases, function(co) {
+  res <- estimate_concentration_field(
+    mask = tissue_mask, transcript_coords = tc_A,
+    D = co$D, lambda = co$lam, production_rate = 1,
+    method = "fd", fd_solver = "direct", grid_resolution = 256L,
+    verbose = FALSE)
+  pk <- round(max(res$field, na.rm = TRUE), 5)
+  plot_field(res, tc_A,
+             title    = co$label,
+             subtitle = sprintf("L=2 fixed | peak=%.5f", pk),
+             pt_size  = 0.3)
+})
+
wrap_plots(dl_pl, nrow = 1) +
+  plot_annotation(
+    title    = "Section 4: Fixed L=2, Varying D and λ",
+    subtitle = "Shape IDENTICAL (same L). Amplitude ∝ 1/D.",
+    theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                     plot.subtitle = element_text(size = 9, color = "grey40")))
+

+

Implication for parameter setting: When fitting to +data, first determine the spatial extent of influence (\(L\)), then fix either \(D\) or \(\lambda\) based on biophysical priors. The +third parameter is then determined.

+
+
+
+

Section 5 — Boundary conditions

+

The fd method supports two boundary conditions:

+
    +
  • Dirichlet +(boundary_condition = "dirichlet"): \(C = 0\) at the tissue boundary. Physically: +molecules are absorbed or degrade instantly at the boundary. This is +appropriate for most spatial transcriptomics sections where the tissue +edge is a hard barrier.
  • +
  • Neumann +(boundary_condition = "neumann"): \(\partial C / \partial n = 0\) at the +boundary (zero normal flux). Physically: no molecules cross the boundary +— they are reflected back in. This is appropriate when modelling a +sealed container.
  • +
+
res_dir <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.2, method = "fd", fd_solver = "direct",
+  boundary_condition = "dirichlet", grid_resolution = 256L, verbose = FALSE)
+
+res_neu <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.2, method = "fd", fd_solver = "direct",
+  boundary_condition = "neumann", grid_resolution = 256L, verbose = FALSE)
+
+res_bcd <- res_dir
+res_bcd$field <- res_neu$field - res_dir$field
+
sl <- function(res, lbl) {
+  df <- field_to_df(res)
+  s  <- df[abs(df$y) < res$hy * 1.5 & !is.na(df$field), ]
+  s  <- s[order(s$x), ]; s$BC <- lbl; s
+}
+sl_all <- rbind(sl(res_dir, "Dirichlet"), sl(res_neu, "Neumann"))
+
+p5d <- ggplot(sl_all, aes(x = x, y = field, color = BC)) +
+  geom_line(linewidth = 0.8) +
+  scale_color_manual(values = c("Dirichlet" = "#e74c3c",
+                                 "Neumann"   = "#2980b9")) +
+  labs(title    = "5d. Cross-section at y ≈ 0",
+       subtitle = "Dirichlet drops to 0; Neumann stays elevated",
+       x = "X", y = "Conc.", color = "BC") +
+  theme_demo()
+
(plot_field(res_dir, tc_A,
+            title    = "5a. Dirichlet (C=0 at boundary)",
+            subtitle = "D=1, λ=0.2, L≈2.24") +
+ plot_field(res_neu, tc_A,
+            title    = "5b. Neumann (∂C/∂n=0)",
+            subtitle = "Molecules reflected; higher near edges",
+            palette  = "plasma")) /
+(plot_field(res_bcd,
+            title    = "5c. Neumann − Dirichlet",
+            subtitle = "Neumann retains more concentration near boundary",
+            symmetric = TRUE, show_contours = FALSE, show_pts = FALSE) +
+ p5d) +
+plot_annotation(
+  title = "Section 5: Boundary Condition Comparison",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

Recommendation: Use "dirichlet" for +tissue sections. Neumann BCs are mainly useful when comparing to +analytic solutions or modelling sealed compartments.

+
+
+
+

Section 6 — UMI weighting vs. equal weighting

+

Each transcript can carry a weight (e.g. its UMI count) via the +weight_col argument. This means a cell with 50 detected +transcripts of a gene contributes 50× more source density than a cell +with 1.

+
res_uw <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, weight_col = NULL, verbose = FALSE)
+
+res_wt <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, weight_col = "umi", verbose = FALSE)
+
+# Normalise both for visual comparison
+nrm <- function(r) {
+  f <- r$field
+  r$field <- (f - min(f, na.rm=TRUE)) / diff(range(f, na.rm=TRUE))
+  r
+}
+
plot_field(nrm(res_uw), tc_A,
+           title      = "6a. Unweighted",
+           subtitle   = "All transcripts equal weight",
+           fill_label = "Norm. Conc.") +
+plot_field(nrm(res_wt), tc_A,
+           title      = "6b. UMI-weighted",
+           subtitle   = "High-UMI cluster source amplified",
+           palette    = "plasma",
+           fill_label = "Norm. Conc.") +
+plot_annotation(
+  title = "Section 6: UMI Weighting",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

When to use UMI weighting: When transcript counts +meaningfully reflect expression level differences. Note that this +amplifies both true signal and noise — noisy high-UMI outliers can +dominate the field. Inspect your UMI distribution before applying +weights.

+
+
+
+

Section 7 — External transcripts

+

include_external = TRUE bins transcripts that fall +outside the mask into the nearest inside-edge grid cell, allowing them +to contribute to the field. This is useful when you have transcripts +just outside the mask boundary (e.g. due to mask fitting inaccuracy) +that you still want to include.

+
tc_ext <- data.frame(x = rnorm(80, 6.5, 0.4),
+                      y = rnorm(80, 0, 0.5),
+                      umi = rpois(80, 10))
+ext_in <- as.logical(sf::st_within(
+  sf::st_as_sf(tc_ext, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L])
+tc_ext  <- tc_ext[!ext_in, ]   # keep only those outside the mask
+tc_Ax   <- rbind(tc_A, tc_ext)
+
+res_ne <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_Ax,
+  D = 1, lambda = 0.15, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, include_external = FALSE, verbose = FALSE)
+
+res_we <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_Ax,
+  D = 1, lambda = 0.15, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, include_external = TRUE, verbose = FALSE)
+
+ext_pt_layer <- geom_point(data = tc_ext, aes(x = x, y = y),
+                             color = "orange", size = 0.9, alpha = 0.8,
+                             inherit.aes = FALSE)
+
(plot_field(res_ne, tc_A, title = "7a. include_external = FALSE") +
+ ext_pt_layer) +
+(plot_field(res_we, tc_A, title = "7b. include_external = TRUE",
+            palette = "plasma") +
+ ext_pt_layer) +
+plot_annotation(
+  title = "Section 7: External Transcripts (orange = outside mask)",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+
+
+
+

Section 8 — Holey mask: vascular clearance

+

This section demonstrates a key strength of +method = "fd": it correctly handles holes (e.g. vessel +lumens) within the tissue mask. The fd solver zeroes the +concentration inside each hole, modelling vessels as clearance sinks. +The green FFT method ignores topology entirely — it treats +the holes as normal tissue.

+
vd    <- list(list(cx = -2,  cy =  1.5, r = 1.2),
+               list(cx =  2.5, cy = -2,   r = 1.0),
+               list(cx = -4.5, cy = -2.5, r = 0.8))
+in_v  <- function(x, y) Reduce(`|`, lapply(vd, function(v)
+  sqrt((x - v$cx)^2 + (y - v$cy)^2) < v$r))
+
+n_h   <- 3000
+cnd_h <- data.frame(x = runif(n_h * 8, -11, 11),
+                     y = runif(n_h * 8, -9,   9))
+kp_h  <- in_kidney(cnd_h$x, cnd_h$y) & !in_v(cnd_h$x, cnd_h$y)
+pts_h <- cnd_h[kp_h, ][1:n_h, ]
+
+holey_mask <- fit_spatial_mask(pts_h, method = "raster",
+  raster_resolution = 256L, raster_sigma = 0.4,
+  raster_threshold  = 0.2,  verbose = FALSE)
+
+tc_h <- sample_in_mask(holey_mask, 400)
+
res_hfd    <- estimate_concentration_field(
+  mask = holey_mask, transcript_coords = tc_h,
+  D = 1, lambda = 0.15, method = "fd", fd_solver = "direct",
+  grid_resolution = 300L, verbose = FALSE)
+
+res_htight <- estimate_concentration_field(
+  mask = holey_mask, transcript_coords = tc_h,
+  D = 1, lambda = 1.0, method = "fd", fd_solver = "direct",
+  grid_resolution = 300L, verbose = FALSE)
+
+res_hgrn   <- estimate_concentration_field(
+  mask = holey_mask, transcript_coords = tc_h,
+  D = 1, lambda = 0.15, method = "green",
+  grid_resolution = 300L, verbose = FALSE)
+
+res_hdiff  <- res_hfd
+res_hdiff$field <- res_hfd$field - res_hgrn$field
+
(plot_field(res_hfd,    tc_h,
+            title    = "8a. fd | L ≈ 2.58",
+            subtitle = "Vessel lumens correctly zeroed",
+            pt_size  = 0.3) +
+ plot_field(res_htight, tc_h,
+            title    = "8b. fd | L = 1 (tight)",
+            subtitle = "Steeper depletion near vessels",
+            palette  = "inferno", pt_size = 0.3)) /
+(plot_field(res_hgrn,  tc_h,
+            title    = "8c. green — topology blind",
+            subtitle = "FFT ignores holes entirely",
+            palette  = "plasma", pt_size = 0.3) +
+ plot_field(res_hdiff,
+            title    = "8d. fd − green",
+            subtitle = "fd zeros lumens; green does not",
+            symmetric = TRUE, show_contours = FALSE,
+            show_pts  = FALSE)) +
+plot_annotation(
+  title    = "Section 8: Holey Mask — Vascular Clearance",
+  subtitle = "Use method='fd' whenever the mask has holes",
+  theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                   plot.subtitle = element_text(size = 9, color = "grey40")))
+

+
+

Important: If your tissue mask has holes (vessel +lumens, necrotic cores), always use method = "fd". The +green and kde methods are unaware of domain +topology.

+
+
+
+
+

Section 9 — Wide dynamic range and log-scale visualisation

+

When transcript counts span several orders of magnitude (e.g. a dense +focal cluster alongside sparse background), the linear concentration +scale will be dominated by the peak and the background will appear flat. +The log1p transform reveals structure at both ends of the +dynamic range.

+
tc_w <- rbind(
+  data.frame(x = rnorm(20, -5, 0.3),  y = rnorm(20, 4, 0.3),  umi = rpois(20, 200)),
+  data.frame(x = rnorm(300,  0, 4),   y = rnorm(300, 0, 3),   umi = rpois(300, 1)))
+tc_w_in <- as.logical(sf::st_within(
+  sf::st_as_sf(tc_w[, c("x","y")], coords = c("x","y")),
+  mu_t, sparse = FALSE)[, 1L])
+tc_w <- tc_w[tc_w_in, ]
+
+res_w <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_w,
+  D = 1, lambda = 0.5, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, weight_col = "umi", verbose = FALSE)
+
plot_field(res_w, tc_w,
+           title    = "9a. Linear scale",
+           subtitle = "Background invisible; cluster dominates",
+           pt_color = "white") +
+plot_field(res_w, tc_w,
+           log_scale = TRUE,
+           title    = "9b. log₁₊ scale",
+           subtitle = "Background and cluster both visible",
+           palette  = "viridis", pt_color = "white") +
+plot_annotation(
+  title    = "Section 9: Wide Dynamic Range",
+  subtitle = "Use log_scale=TRUE when max/min UMI ratio > ~20",
+  theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                   plot.subtitle = element_text(size = 9, color = "grey40")))
+

+

You can also apply the log transform inside the solver with +log_transform = TRUE, which is equivalent but applies the +transform before returning the field matrix.

+
+
+
+

Section 10 — Two-gene ratio map

+

A log₂(A/B) ratio map shows where Gene A has higher +concentration relative to Gene B, and vice versa. This is analogous to a +log-fold-change map, but in space rather than between conditions. Blue +regions favour B; yellow/red regions favour A; white is balanced.

+
ph <- list(D = 1, lambda = 0.25, production_rate = 1,
+           method = "fd", fd_solver = "direct",
+           grid_resolution = 256L, verbose = FALSE)
+
+res_gA <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A), ph))
+res_gB <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_B), ph))
+
+res_rt        <- res_gA
+res_rt$field  <- log2((res_gA$field + 1e-6) / (res_gB$field + 1e-6))
+
p_ratio <- plot_field(res_rt,
+  title    = "10c. log₂(A/B) ratio",
+  subtitle = "Red=A territory | Blue=B territory | White=balanced",
+  symmetric = TRUE, show_contours = TRUE, n_contours = 5,
+  show_pts = FALSE, fill_label = "log₂(A/B)") +
+  geom_point(data = tc_A, aes(x = x, y = y),
+             color = "#e74c3c", size = 0.3, alpha = 0.4,
+             inherit.aes = FALSE) +
+  geom_point(data = tc_B, aes(x = x, y = y),
+             color = "#27ae60", size = 0.3, alpha = 0.4,
+             inherit.aes = FALSE)
+
+p_dens <- ggplot(field_to_df(res_rt), aes(x = field,
+    fill = ifelse(field < 0, "B dominant", "A dominant"))) +
+  geom_density(alpha = 0.55) +
+  geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") +
+  scale_fill_manual(values = c("B dominant" = "#2980b9",
+                                "A dominant" = "#c0392b"),
+                    name = NULL) +
+  labs(title    = "10d. Distribution of log₂(A/B)",
+       subtitle = "Fraction of tissue in each territory",
+       x = "log₂(A/B)", y = "Density") +
+  theme_demo()
+
+(plot_field(res_gA, tc_A, title = "10a. Gene A field") +
+ plot_field(res_gB, tc_B, title = "10b. Gene B field", palette = "viridis")) /
+(p_ratio + p_dens) +
+plot_annotation(
+  title = "Section 10: Two-Gene Ratio Map",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

The 1e-6 pseudocount in the ratio calculation prevents +division-by-zero and log(0) in regions where one gene has near-zero +concentration.

+
+
+
+

Section 11 — 100k transcripts and solver timing

+

This section benchmarks the three solvers on 100,000 transcripts at +two grid resolutions.

+
tc_100k <- sample_in_mask(tissue_mask, 100000)
+cat(sprintf("Sampled %d transcripts.\n", nrow(tc_100k)))
+
## Sampled 100000 transcripts.
+
t1 <- system.time(r1 <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_100k,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, verbose = TRUE))
+
## ============================================================
+##   estimate_concentration_field
+## ============================================================
+##   Method: fd | D: 1 | lambda: 0.3 | L: 1.82574 | N: 256 x 256
+##   BC: dirichlet | solver: direct
+## ------------------------------------------------------------
+##   Transcripts: 100000 used (0 external excluded)
+##   Rasterizing 256x256 grid...
+##   Inside-mask cells: 54302 (82.9%)
+##   Sparse system: 54302x54302, 270424 nnz
+##   Solve: 0.70s | Total: 1.32s | Range: [16.38, 986.3]
+## ============================================================
+
t2 <- system.time(r2 <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_100k,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "iterative",
+  sor_omega = 1.75, grid_resolution = 512L, verbose = TRUE))
+
## ============================================================
+##   estimate_concentration_field
+## ============================================================
+##   Method: fd | D: 1 | lambda: 0.3 | L: 1.82574 | N: 512 x 512
+##   BC: dirichlet | solver: iterative
+## ------------------------------------------------------------
+##   Transcripts: 100000 used (0 external excluded)
+##   Rasterizing 512x512 grid...
+##   Inside-mask cells: 217162 (82.8%)
+
##   Solve: 32.44s | Total: 33.85s | Range: [7.691, 916.7]
+## ============================================================
+
t3 <- system.time(r3 <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_100k,
+  D = 1, lambda = 0.3, method = "green",
+  grid_resolution = 512L, verbose = TRUE))
+
## ============================================================
+##   estimate_concentration_field
+## ============================================================
+##   Method: green | D: 1 | lambda: 0.3 | L: 1.82574 | N: 512 x 512
+## ------------------------------------------------------------
+##   Transcripts: 100000 used (0 external excluded)
+##   Rasterizing 512x512 grid...
+##   Inside-mask cells: 217162 (82.8%)
+##   Solve: 0.36s | Total: 1.73s | Range: [399.2, 1016]
+## ============================================================
+
data.frame(
+  Solver          = c("fd/direct (N=256)", "fd/SOR (N=512)", "green/FFT (N=512)"),
+  Time_s          = round(c(t1["elapsed"], t2["elapsed"], t3["elapsed"]), 2),
+  Has_BCs         = c("Yes", "Yes", "No"),
+  Handles_holes   = c("Yes", "Yes", "No"),
+  Notes           = c("Sparse LU; recommended up to N=400",
+                       "Iterative; use for N > 400",
+                       "Fastest; infinite domain only")
+)
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SolverTime_sHas_BCsHandles_holesNotes
fd/direct (N=256)1.32YesYesSparse LU; recommended up to N=400
fd/SOR (N=512)33.85YesYesIterative; use for N > 400
green/FFT (N=512)1.73NoNoFastest; infinite domain only
+
+
sub <- tc_100k[sample(nrow(tc_100k), 2000), ]
+plot_field(r1, sub,
+           title    = sprintf("11a. fd/direct (%.1f s)", t1["elapsed"]),
+           subtitle = "N=256 | sparse LU",
+           pt_size = 0.15, pt_alpha = 0.2) +
+plot_field(r2, sub,
+           title    = sprintf("11b. fd/SOR (%.1f s)", t2["elapsed"]),
+           subtitle = "N=512 | iterative",
+           palette  = "plasma", pt_size = 0.15, pt_alpha = 0.2) +
+plot_field(r3, sub,
+           title    = sprintf("11c. green/FFT (%.1f s)", t3["elapsed"]),
+           subtitle = "N=512 | FFT",
+           palette  = "viridis", pt_size = 0.15, pt_alpha = 0.2) +
+plot_annotation(
+  title    = "Section 11: 100k Transcripts — Solver Comparison",
+  subtitle = "2,000 of 100,000 points shown",
+  theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                   plot.subtitle = element_text(size = 9, color = "grey40")))
+

+
+

Solver selection guide

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + +
SituationRecommended solver
N ≤ 400, holey mask, or exact BCs neededfd, fd_solver = "direct"
N > 400, high-res output neededfd, fd_solver = "iterative" (SOR)
Fast sweep over L values, no BCs"green"
Quick visual check"kde"
+
+
+
+
+

Section 12 — Quantitative field extraction

+

Beyond visualisation, the field output can be queried numerically. +Here we demonstrate the main extraction operations.

+
res_q <- res_fd   # reuse the Gene A result from Section 2
+
+# ---- Bilinear interpolation at arbitrary query points ----
+interpolate_field <- function(result, query_pts) {
+  xg <- result$x; yg <- result$y; f <- result$field
+  vapply(seq_len(nrow(query_pts)), function(i) {
+    qx <- query_pts$x[i]; qy <- query_pts$y[i]
+    xi <- findInterval(qx, xg); yi <- findInterval(qy, yg)
+    if (xi < 1 || xi >= length(xg) || yi < 1 || yi >= length(yg))
+      return(NA_real_)
+    wx <- (qx - xg[xi]) / (xg[xi+1] - xg[xi])
+    wy <- (qy - yg[yi]) / (yg[yi+1] - yg[yi])
+    (1-wx)*(1-wy)*f[xi,yi]   + wx*(1-wy)*f[xi+1,yi] +
+    (1-wx)*wy   *f[xi,yi+1] + wx*wy    *f[xi+1,yi+1]
+  }, numeric(1))
+}
+
queries <- data.frame(
+  label = c("Cluster core", "Cluster edge", "Tissue centre",
+             "Far right", "Near notch"),
+  x = c(-4.5, -3.0, 0.0,  5.0, 3.0),
+  y = c( 3.8,  2.5, 0.0,  0.0, 0.0))
+queries$concentration <- interpolate_field(res_q, queries)
+queries
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
labelxyconcentration
Cluster core-4.53.846.4145453
Cluster edge-3.02.522.7933802
Tissue centre0.00.02.1617289
Far right5.00.0NA
Near notch3.00.00.7897652
+
+
df_q <- field_to_df(res_q)
+
+# 12b: Mean concentration by tissue half
+cat(sprintf("Mean concentration:\n  Left half  (x < 0): %.5f\n  Right half (x ≥ 0): %.5f\n",
+    mean(df_q$field[df_q$x <  0 & !is.na(df_q$field)]),
+    mean(df_q$field[df_q$x >= 0 & !is.na(df_q$field)])))
+
## Mean concentration:
+##   Left half  (x < 0): 6.15321
+##   Right half (x ≥ 0): 0.53608
+
# 12c: Peak location
+pk <- which.max(res_q$field)
+N  <- length(res_q$x)
+cat(sprintf("\nPeak: x=%.2f, y=%.2f, value=%.6f\n",
+    res_q$x[((pk-1L) %% N) + 1L],
+    res_q$y[((pk-1L) %/% N) + 1L],
+    res_q$field[pk]))
+
## 
+## Peak: x=-4.27, y=3.88, value=47.085967
+
# 12d: Field integral
+total  <- sum(res_q$field[res_q$mask], na.rm = TRUE) * res_q$hx * res_q$hy
+theory <- nrow(tc_A) * 1.0 / 0.3   # n * production_rate / lambda
+cat(sprintf("\nField integral: %.4f\nTheory (n·r/λ): %.4f\nAgreement: %.1f%%\n",
+    total, theory, 100 * (1 - abs(total - theory) / theory)))
+
## 
+## Field integral: 1103.1321
+## Theory (n·r/λ): 1333.3333
+## Agreement: 82.7%
+
prof <- df_q[abs(df_q$y) < res_q$hy * 1.5 & !is.na(df_q$field), ]
+ggplot(prof[order(prof$x), ], aes(x = x, y = field)) +
+  geom_line(color = "#e74c3c", linewidth = 0.8) +
+  geom_area(fill = "#e74c3c", alpha = 0.15) +
+  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
+  labs(title    = "12e. Concentration profile along y ≈ 0",
+       subtitle = "Cluster peak at x ≈ −4.5; near-zero at right edge",
+       x = "X", y = "Concentration") +
+  theme_demo()
+

+
+
+
+

Parameter quick-reference

+
estimate_concentration_field() — parameter guide
+─────────────────────────────────────────────────────────────────────
+
+PHYSICS
+  diffusion_length  Primary tuning knob. Set directly: L = 0.5 to 10
+                    Rule: L ~ 10–20% of inter-cluster distance
+  D                 Affects amplitude only (amplitude ∝ 1/D)
+  lambda            Affects amplitude only; lambda = D / L^2
+  production_rate   Global amplitude scaling; use weight_col for per-cell
+
+SOLVER
+  method = "fd"     Default. Use whenever you need accurate BCs or holes.
+  method = "green"  Faster for sweeps; ignores domain shape.
+  method = "kde"    Quick baseline; no physics.
+
+  fd_solver = "direct"     N ≤ 400  (sparse LU decomposition)
+  fd_solver = "iterative"  N > 400  (red-black SOR)
+
+  boundary_condition = "dirichlet"  C=0 at boundary. Default. Tissue sections.
+  boundary_condition = "neumann"    dC/dn=0. Sealed system.
+
+  grid_resolution = 256   Explore. 512 for publication quality.
+
+WEIGHTS
+  weight_col = "umi"   Use UMI counts as source strength.
+                        Careful: amplifies outliers.
+
+POST-PROCESSING
+  log_transform = TRUE    Apply log1p() after solving. Useful for wide dynamic range.
+  normalize = TRUE        Rescale to [0,1] for comparison across conditions.
+  clip_negative = TRUE    Remove small numerical negatives. Default on.
+─────────────────────────────────────────────────────────────────────
+
+
+
+

Session information

+
sessionInfo()
+
## R version 4.5.2 (2025-10-31)
+## Platform: aarch64-apple-darwin20
+## Running under: macOS Sonoma 14.6.1
+## 
+## Matrix products: default
+## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
+## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## time zone: America/New_York
+## tzcode source: internal
+## 
+## attached base packages:
+## [1] stats     graphics  grDevices utils     datasets  methods   base     
+## 
+## other attached packages:
+## [1] patchwork_1.3.2 ggplot2_4.0.1   sf_1.0-21      
+## 
+## loaded via a namespace (and not attached):
+##  [1] sass_0.4.10        generics_0.1.4     class_7.3-23       KernSmooth_2.23-26
+##  [5] lattice_0.22-7     digest_0.6.39      magrittr_2.0.4     evaluate_1.0.5    
+##  [9] grid_4.5.2         RColorBrewer_1.1-3 fastmap_1.2.0      Matrix_1.7-4      
+## [13] jsonlite_2.0.0     e1071_1.7-16       DBI_1.2.3          viridisLite_0.4.2 
+## [17] scales_1.4.0       codetools_0.2-20   isoband_0.3.0      jquerylib_0.1.4   
+## [21] cli_3.6.5          rlang_1.1.7        units_0.8-7        withr_3.0.2       
+## [25] cachem_1.1.0       yaml_2.3.12        otel_0.2.0         tools_4.5.2       
+## [29] concaveman_1.2.0   dplyr_1.1.4        vctrs_0.7.1        R6_2.6.1          
+## [33] proxy_0.4-27       lifecycle_1.0.5    classInt_0.4-11    MASS_7.3-65       
+## [37] pkgconfig_2.0.3    pillar_1.11.1      bslib_0.10.0       gtable_0.3.6      
+## [41] glue_1.8.0         Rcpp_1.1.1         xfun_0.56          tibble_3.3.1      
+## [45] tidyselect_1.2.1   rstudioapi_0.17.1  knitr_1.51         dichromat_2.0-0.1 
+## [49] farver_2.1.2       htmltools_0.5.9    rmarkdown_2.30     labeling_0.4.3    
+## [53] compiler_4.5.2     S7_0.2.1
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + + diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec1-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec1-plot-1.png new file mode 100644 index 0000000..0408397 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec1-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec10-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec10-plot-1.png new file mode 100644 index 0000000..4d75c93 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec10-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec11-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec11-plot-1.png new file mode 100644 index 0000000..0f0e21e Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec11-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec12-profile-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec12-profile-1.png new file mode 100644 index 0000000..b52cce3 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec12-profile-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec2-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec2-plot-1.png new file mode 100644 index 0000000..d7799ec Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec2-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec3-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec3-plot-1.png new file mode 100644 index 0000000..15407c8 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec3-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec4-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec4-plot-1.png new file mode 100644 index 0000000..0e4fcfe Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec4-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec5-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec5-plot-1.png new file mode 100644 index 0000000..f9ddb61 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec5-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec6-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec6-plot-1.png new file mode 100644 index 0000000..f71ca08 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec6-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec7-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec7-plot-1.png new file mode 100644 index 0000000..065b7af Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec7-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec8-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec8-plot-1.png new file mode 100644 index 0000000..9428ce0 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec8-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec9-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec9-plot-1.png new file mode 100644 index 0000000..9c1083a Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec9-plot-1.png differ diff --git a/tests/testthat/test-estimate_concentration_field.R b/tests/testthat/test-estimate_concentration_field.R new file mode 100644 index 0000000..c22d74f --- /dev/null +++ b/tests/testthat/test-estimate_concentration_field.R @@ -0,0 +1,296 @@ +## Tests for estimate_concentration_field() +## Uses a simple square polygon so TissueMask is not required. + +library(sf) + +# ---- shared fixtures ------------------------------------------------------- + +sq <- st_polygon(list(cbind(c(0, 10, 10, 0, 0), c(0, 0, 10, 10, 0)))) +mask <- st_sfc(sq) + +set.seed(42) +tc <- data.frame(x = runif(80, 1, 9), y = runif(80, 1, 9)) + +# run once for re-use +res_default <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + grid_resolution = 64L, verbose = FALSE +) + + +# ---- return structure ------------------------------------------------------ + +test_that("returns a list with required elements", { + expect_type(res_default, "list") + expect_named(res_default, c("field", "x", "y", "hx", "hy", + "mask", "sources", "params", "diagnostics")) +}) + +test_that("field matrix has correct dimensions", { + expect_equal(dim(res_default$field), c(64L, 64L)) +}) + +test_that("x and y vectors have length N", { + expect_length(res_default$x, 64L) + expect_length(res_default$y, 64L) +}) + +test_that("mask matrix is logical with correct dimensions", { + expect_type(res_default$mask, "logical") + expect_equal(dim(res_default$mask), c(64L, 64L)) +}) + +test_that("sources matrix is returned by default", { + expect_equal(dim(res_default$sources), c(64L, 64L)) +}) + +test_that("sources is NULL when return_sources = FALSE", { + res <- estimate_concentration_field( + mask, tc, grid_resolution = 32L, verbose = FALSE, + return_sources = FALSE + ) + expect_null(res$sources) +}) + + +# ---- field values ---------------------------------------------------------- + +test_that("field is NA outside mask", { + outside <- !res_default$mask + expect_true(all(is.na(res_default$field[outside]))) +}) + +test_that("field is non-negative inside mask (clip_negative = TRUE)", { + inside_vals <- res_default$field[res_default$mask] + expect_true(all(inside_vals >= 0, na.rm = TRUE)) +}) + +test_that("field has positive peak concentration", { + expect_gt(max(res_default$field, na.rm = TRUE), 0) +}) + + +# ---- solvers --------------------------------------------------------------- + +test_that("method = 'fd' direct solver works", { + res <- estimate_concentration_field( + mask, tc, method = "fd", fd_solver = "direct", + grid_resolution = 32L, verbose = FALSE + ) + expect_false(all(is.na(res$field))) +}) + +test_that("method = 'fd' iterative (SOR) solver works", { + res <- estimate_concentration_field( + mask, tc, method = "fd", fd_solver = "iterative", + D = 1, lambda = 0.2, grid_resolution = 32L, verbose = FALSE + ) + expect_false(all(is.na(res$field))) +}) + +test_that("method = 'green' works", { + res <- estimate_concentration_field( + mask, tc, method = "green", D = 1, lambda = 0.2, + grid_resolution = 32L, verbose = FALSE + ) + expect_false(all(is.na(res$field))) +}) + +test_that("method = 'kde' works", { + res <- estimate_concentration_field( + mask, tc, method = "kde", kde_bandwidth = 2, + grid_resolution = 32L, verbose = FALSE + ) + expect_false(all(is.na(res$field))) +}) + + +# ---- diffusion_length override -------------------------------------------- + +test_that("diffusion_length override changes peak concentration", { + res_sm <- estimate_concentration_field( + mask, tc, diffusion_length = 0.5, + grid_resolution = 32L, verbose = FALSE + ) + res_lg <- estimate_concentration_field( + mask, tc, diffusion_length = 8.0, + grid_resolution = 32L, verbose = FALSE + ) + pk_sm <- max(res_sm$field, na.rm = TRUE) + pk_lg <- max(res_lg$field, na.rm = TRUE) + # Smaller L -> tighter, higher peak + expect_gt(pk_sm, pk_lg) +}) + +test_that("params$diffusion_length matches supplied value", { + res <- estimate_concentration_field( + mask, tc, diffusion_length = 3.5, + grid_resolution = 32L, verbose = FALSE + ) + expect_equal(res$params$diffusion_length, 3.5) +}) + + +# ---- boundary conditions --------------------------------------------------- + +test_that("Neumann BC produces higher peak than Dirichlet at same params", { + rd <- estimate_concentration_field( + mask, tc, method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet", D = 1, lambda = 0.2, + grid_resolution = 32L, verbose = FALSE + ) + rn <- estimate_concentration_field( + mask, tc, method = "fd", fd_solver = "direct", + boundary_condition = "neumann", D = 1, lambda = 0.2, + grid_resolution = 32L, verbose = FALSE + ) + expect_gt(max(rn$field, na.rm = TRUE), max(rd$field, na.rm = TRUE)) +}) + + +# ---- post-processing ------------------------------------------------------- + +test_that("normalize = TRUE gives field in [0, 1]", { + res <- estimate_concentration_field( + mask, tc, normalize = TRUE, grid_resolution = 32L, verbose = FALSE + ) + inside <- res$field[res$mask] + expect_lte(max(inside), 1 + 1e-10) + expect_gte(min(inside), 0 - 1e-10) +}) + +test_that("log_transform = TRUE gives non-negative transformed field", { + res <- estimate_concentration_field( + mask, tc, log_transform = TRUE, grid_resolution = 32L, verbose = FALSE + ) + inside <- res$field[res$mask] + expect_true(all(inside >= 0)) +}) + + +# ---- weighting and external transcripts ----------------------------------- + +test_that("weight_col scales field proportionally", { + tc2 <- tc + tc2$umi <- 1 + tc2$umi[1:20] <- 10 + res_w <- estimate_concentration_field( + mask, tc2, weight_col = "umi", production_rate = 1, + grid_resolution = 32L, verbose = FALSE + ) + # Simple unweighted version + res_u <- estimate_concentration_field( + mask, tc2, grid_resolution = 32L, verbose = FALSE + ) + # Weighted peak should be higher (heavy transcripts have more influence) + expect_gt(max(res_w$field, na.rm = TRUE), max(res_u$field, na.rm = TRUE)) +}) + +test_that("include_external = TRUE does not error", { + tc_ext <- data.frame(x = c(tc$x, 15, 16), y = c(tc$y, 15, 16)) + expect_no_error( + estimate_concentration_field( + mask, tc_ext, include_external = TRUE, + grid_resolution = 32L, verbose = FALSE + ) + ) +}) + + +# ---- matrix input ---------------------------------------------------------- + +test_that("matrix input for transcript_coords is accepted", { + tc_mat <- as.matrix(tc[, c("x", "y")]) + expect_no_error( + estimate_concentration_field( + mask, tc_mat, grid_resolution = 32L, verbose = FALSE + ) + ) +}) + + +# ---- NA rows in input ------------------------------------------------------ + +test_that("NA rows in transcript_coords are silently dropped", { + tc_na <- tc + tc_na[5, "x"] <- NA + expect_no_error( + estimate_concentration_field( + mask, tc_na, grid_resolution = 32L, verbose = FALSE + ) + ) +}) + + +# ---- verbose = FALSE ------------------------------------------------------- + +test_that("verbose = FALSE produces no messages", { + expect_no_message( + suppressWarnings( + estimate_concentration_field( + mask, tc, grid_resolution = 32L, verbose = FALSE + ) + ) + ) +}) + + +# ---- error cases ----------------------------------------------------------- + +test_that("no transcripts in mask raises error", { + tc_out <- data.frame(x = c(20, 21), y = c(20, 21)) + expect_error( + estimate_concentration_field(mask, tc_out, verbose = FALSE), + "No transcripts available" + ) +}) + +test_that("method = 'green' with lambda = 0 raises error", { + expect_error( + estimate_concentration_field( + mask, tc, method = "green", lambda = 0, + grid_resolution = 32L, verbose = FALSE + ), + "lambda" + ) +}) + +test_that("unknown method raises informative error", { + expect_error( + estimate_concentration_field( + mask, tc, method = "banana", + grid_resolution = 32L, verbose = FALSE + ), + "Unknown method" + ) +}) + + +# ---- utility functions ----------------------------------------------------- + +test_that("field_to_df returns data.frame with required columns", { + df <- field_to_df(res_default) + expect_s3_class(df, "data.frame") + expect_true(all(c("x", "y", "field", "inside") %in% names(df))) +}) + +test_that("field_to_df inside_only = FALSE includes NA-field rows", { + df_all <- field_to_df(res_default, inside_only = FALSE) + df_in <- field_to_df(res_default, inside_only = TRUE) + expect_gt(nrow(df_all), nrow(df_in)) +}) + +test_that("sweep_diffusion_length returns data.frame with L column", { + sw <- sweep_diffusion_length( + c(1, 3), mask, tc, grid_resolution = 32L + ) + expect_s3_class(sw, "data.frame") + expect_true("L" %in% names(sw)) + expect_equal(sort(unique(sw$L)), c(1, 3)) +}) + +test_that("plot_concentration_field returns ggplot when ggplot2 available", { + skip_if_not_installed("ggplot2") + p <- plot_concentration_field(res_default) + expect_s3_class(p, "ggplot") +}) diff --git a/vignettes/complete-reference.Rmd b/vignettes/complete-reference.Rmd new file mode 100644 index 0000000..86f1a18 --- /dev/null +++ b/vignettes/complete-reference.Rmd @@ -0,0 +1,857 @@ +--- +title: "Complete Reference: estimate_concentration_field()" +subtitle: "Physics, solvers, and practical tuning" +author: "Raredon Laboratory" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Complete Reference: estimate_concentration_field()} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + fig.width = 9, + fig.height = 5 +) +``` + +--- + +# Overview + +Spatial transcriptomics data gives us discrete transcript or cell locations, but many biological +processes — ligand gradients, cytokine fields, metabolite concentrations — are continuous. +`estimate_concentration_field()` bridges this gap by computing the **steady-state concentration +field** that a set of point sources (mRNA transcripts, detected proteins) would produce under a +physically motivated diffusion-clearance model. + +The result is a dense concentration map over the tissue, interpolated from point source locations. +This can be used to: + +- Identify where a signalling molecule is most concentrated +- Compute the concentration seen by each cell +- Create two-gene ratio maps (spatial domains of ligand/receptor balance) +- Visualise how diffusion length affects signal range + +--- + +# The physical model + +The function solves the **screened Poisson equation** (steady-state diffusion-clearance PDE): + +$$D \nabla^2 C(\mathbf{x}) - \lambda C(\mathbf{x}) + s(\mathbf{x}) = 0$$ + +| Symbol | Meaning | Parameter | +|---|---|---| +| $C(\mathbf{x})$ | Steady-state concentration at position **x** | (what we solve for) | +| $D$ | Diffusion coefficient (spatial spread rate) | `D` | +| $\lambda$ | First-order clearance rate | `lambda` | +| $s(\mathbf{x})$ | Source density (production rate per unit area) | `production_rate` | + +The key derived quantity is the **diffusion length**: + +$$L = \sqrt{D / \lambda}$$ + +$L$ sets the characteristic distance over which concentration decays from a point source. It is the +single most important parameter to tune. The shape of the field depends only on $L$; the amplitude +scales as $1/D$. + +> **Practical interpretation:** $L$ is roughly the radius of influence of a single transcript. At +> distance $r \gg L$ from a source, the concentration is negligible. At $r \ll L$, concentration is +> near its maximum. + +--- + +# Setup + +```{r libraries} +library(TissueField) +library(sf) +library(ggplot2) +library(patchwork) +set.seed(2024) +``` + +--- + +# Shared helpers + +```{r helpers} +# ---- Plotting theme ---- +theme_demo <- function(bs = 9.5) + theme_minimal(base_size = bs) + + theme(plot.title = element_text(face = "bold", size = bs + 0.5), + plot.subtitle = element_text(size = bs - 1, color = "grey40", + margin = margin(b = 4)), + panel.grid = element_line(color = "grey94"), + legend.key.width = unit(0.35, "cm"), + legend.title = element_text(size = bs - 1), + legend.text = element_text(size = bs - 1.5), + axis.text = element_text(size = bs - 2)) + +# ---- Field visualiser ---- +plot_field <- function(result, transcripts = NULL, + title = "", subtitle = "", + palette = "magma", log_scale = FALSE, + show_pts = TRUE, show_contours = TRUE, n_contours = 6L, + pt_size = 0.35, pt_alpha = 0.45, pt_color = "#00e5ff", + fill_label = "Conc.", symmetric = FALSE) { + df <- field_to_df(result, inside_only = TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log1p(", fill_label, ")") else fill_label + p <- ggplot(df, aes(x = x, y = y, fill = .data[[fill_col]])) + + geom_raster(interpolate = TRUE) + coord_equal() + + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]), na.rm = TRUE) + p <- p + scale_fill_distiller(palette = "RdBu", limits = c(-lim, lim), + name = fill_lbl, na.value = "transparent") + } else { + p <- p + scale_fill_viridis_c(option = palette, name = fill_lbl, + na.value = "transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z = field), color = "white", + alpha = 0.35, linewidth = 0.25, bins = n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data = transcripts, aes(x = x, y = y), + inherit.aes = FALSE, color = pt_color, + size = pt_size, alpha = pt_alpha) + p +} + +# ---- Sample points uniformly from inside a mask ---- +sample_in_mask <- function(mask_poly, n, max_tries = 20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x = numeric(0), y = numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts) >= n) break + cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]), + y = runif(n * 8, bb["ymin"], bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L]) + pts <- rbind(pts, cnd[ok, ]) + } + pts[seq_len(min(n, nrow(pts))), ] +} +``` + +--- + +# Function reference + +```r +estimate_concentration_field( + mask, # sfc polygon -- output of fit_spatial_mask() + transcript_coords, # data.frame with columns x and y + + # Physics + D = 1.0, # diffusion coefficient (arbitrary units) + lambda = 0.1, # first-order clearance rate + diffusion_length = NULL, # override: set L directly + production_rate = 1.0, + + # Solver + method = "fd", # "fd" | "green" | "kde" + grid_resolution = 256L, # N x N grid + boundary_condition = "dirichlet", # "dirichlet" | "neumann" (fd only) + fd_solver = "direct", # "direct" | "iterative" (fd only) + + # Weighting + weight_col = NULL, # column name for per-transcript weights (e.g. "umi") + + # Optional + include_external = FALSE, + normalize = FALSE, + log_transform = FALSE, + clip_negative = TRUE, + n_cores = 1L, + plot = FALSE, + verbose = TRUE +) +``` + +## Return value + +| Element | Contents | +|---|---| +| `field` | N x N matrix; `NA` outside the mask | +| `x`, `y` | Grid centre coordinates (length N each) | +| `hx`, `hy` | Grid cell width and height | +| `mask` | N x N logical matrix (TRUE = inside mask) | +| `sources` | N x N matrix of binned source density | +| `params` | List of all input parameters | +| `diagnostics` | Timing, counts, etc. | + +Use `field_to_df(result)` to convert to a tidy data frame for ggplot2. + +--- + +# Section 1 -- Synthetic tissue and transcript layout + +A kidney-bean-shaped tissue with two synthetic genes: + +- **Gene A**: focal cluster at upper-left, plus sparse background. High UMI count. +- **Gene B**: 500 transcripts with a right-biased spatial distribution. Low UMI count. + +```{r sec1-tissue} +in_kidney <- function(x, y, oa = 9, ob = 7, ncx = 5, nr = 3.8) + (x/oa)^2 + (y/ob)^2 < 1 & sqrt((x - ncx)^2 + y^2) >= nr + +cands <- data.frame(x = runif(18000, -11, 11), y = runif(18000, -9, 9)) +tissue_pts <- cands[in_kidney(cands$x, cands$y), ][1:3000, ] + +if (requireNamespace("TissueMask", quietly = TRUE)) { + tissue_mask <- TissueMask::fit_spatial_mask( + tissue_pts, method = "raster", + raster_resolution = 256L, raster_sigma = 0.7, raster_threshold = 0.15, + verbose = FALSE) +} else { + pts_sf <- st_as_sf(tissue_pts, coords = c("x","y")) + tissue_mask <- st_convex_hull(st_union(pts_sf)) +} +mu_t <- sf::st_union(tissue_mask) +``` + +```{r sec1-transcripts} +# Gene A: focal cluster at (-4.5, 3.8) + sparse background +tc_A_cl <- data.frame(x = rnorm(350, -4.5, 1.2), y = rnorm(350, 3.8, 0.9)) +A_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_A_cl, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L]) +tc_A_cl <- tc_A_cl[A_in, ] +tc_A_sc <- sample_in_mask(tissue_mask, 50) +tc_A <- rbind(tc_A_cl, tc_A_sc) +tc_A$umi <- c(rpois(nrow(tc_A_cl), 15), rpois(nrow(tc_A_sc), 2)) + +# Gene B: right-biased random distribution +tc_B_all <- sample_in_mask(tissue_mask, 2000) +prob_B <- plogis(tc_B_all$x / 3) +tc_B <- tc_B_all[runif(nrow(tc_B_all)) < prob_B, ][1:500, ] +tc_B$umi <- rpois(nrow(tc_B), 5) +``` + +```{r sec1-plot, fig.height=5} +tissue_sf <- sf::st_sf(geometry = tissue_mask) + +ggplot() + + geom_sf(data = tissue_sf, fill = "grey92", color = "grey60", linewidth = 0.4) + + geom_point(data = tc_A, aes(x = x, y = y, color = "Gene A", size = log1p(umi)), + alpha = 0.6) + + geom_point(data = tc_B, aes(x = x, y = y, color = "Gene B", size = log1p(umi)), + alpha = 0.5) + + scale_color_manual(values = c("Gene A" = "#e74c3c", "Gene B" = "#2980b9"), + name = NULL) + + scale_size_continuous(range = c(0.3, 2.5), name = "log(UMI+1)") + + coord_sf(datum = NA) + theme_demo() + + labs(title = "Section 1: Synthetic kidney-bean tissue", + subtitle = "Gene A: focal upper-left | Gene B: broad right-biased", + x = "X", y = "Y") +``` + +--- + +# Section 2 -- Method comparison: fd, green, kde + +```{r sec2} +pb <- list(D = 1, lambda = 0.3, production_rate = 1, + grid_resolution = 256L, verbose = FALSE) + +res_fd <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet"), pb)) + +res_green <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "green"), pb)) + +res_kde <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "kde", kde_bandwidth = sqrt(1/0.3)), pb)) + +res_diff <- res_fd +res_diff$field <- res_fd$field - res_green$field +``` + +```{r sec2-plot, fig.height=8} +(plot_field(res_fd, tc_A, title = "2a. fd (Dirichlet)", + subtitle = "Exact BCs; recommended") + + plot_field(res_green, tc_A, title = "2b. green (FFT)", + subtitle = "Infinite domain; no boundary effects")) / +(plot_field(res_kde, tc_A, title = "2c. kde", + subtitle = "Bandwidth = L; phenomenological") + + plot_field(res_diff, title = "2d. fd - green", + subtitle = "fd correctly suppresses near-edge concentration", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE)) + +plot_annotation( + title = "Section 2: Solver Comparison (Gene A, D=1, lambda=0.3, L ~ 1.83)", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**Key observations:** + +- `fd` and `green` agree well in the interior; difference map (2d) shows `fd` correctly zeros + concentration at the tissue boundary (Dirichlet), while `green` leaks outward. +- `kde` is visually similar because the bandwidth was set to $L$, but it has no physical + interpretation and no boundary conditions. + +--- + +# Section 3 -- Diffusion length sweep + +The diffusion length $L = \sqrt{D/\lambda}$ is the primary tuning parameter. + +```{r sec3} +L_vals <- c(0.5, 1.5, 4.0, 10.0) + +sw3 <- lapply(L_vals, function(Lv) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, diffusion_length = Lv, production_rate = 1, + method = "fd", fd_solver = "direct", grid_resolution = 256L, verbose = FALSE) + f <- res$field + res$field <- (f - min(f, na.rm = TRUE)) / diff(range(f, na.rm = TRUE)) + plot_field(res, tc_A, + title = sprintf("L = %.1f", Lv), + subtitle = sprintf("lambda ~ %.3f", 1/Lv^2), + fill_label = "Norm. Conc.", pt_size = 0.4) + + scale_fill_viridis_c(option = "magma", name = "Norm.\nConc.", + limits = c(0, 1), na.value = "transparent") +}) +``` + +```{r sec3-plot, fig.height=4} +wrap_plots(sw3, nrow = 1) + + plot_annotation( + title = "Section 3: Diffusion Length Sweep (each panel normalised independently)", + subtitle = "Small L: tight peaks | Large L: tissue-wide field", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +```{r sec3-peaks} +peaks <- sapply(L_vals, function(Lv) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, diffusion_length = Lv, + method = "fd", fd_solver = "direct", grid_resolution = 256L, verbose = FALSE) + max(res$field, na.rm = TRUE) +}) +knitr::kable(data.frame(L = L_vals, peak_concentration = round(peaks, 5))) +``` + +### How to choose L in practice + +1. **What is the typical inter-cluster distance?** $L$ should be 10--20% of this distance. +2. **Are you modelling a small diffusing molecule or a large one?** Small molecules have higher $D$ and thus larger $L$. +3. **Does your field look biologically reasonable?** Inspect the output and compare to known biology. + +--- + +# Section 4 -- Fixed L, varying D and lambda + +**Field shape depends only on $L$.** Changing $D$ and $\lambda$ while keeping $L = \sqrt{D/\lambda}$ constant produces identical spatial patterns, scaling amplitude as $1/D$. + +```{r sec4} +DL_cases <- list( + list(D = 0.1, lam = 0.025, label = "D=0.1, lam=0.025"), + list(D = 1, lam = 0.25, label = "D=1, lam=0.25"), + list(D = 5, lam = 1.25, label = "D=5, lam=1.25"), + list(D = 20, lam = 5, label = "D=20, lam=5") +) + +dl_pl <- lapply(DL_cases, function(co) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = co$D, lambda = co$lam, production_rate = 1, + method = "fd", fd_solver = "direct", grid_resolution = 256L, verbose = FALSE) + pk <- round(max(res$field, na.rm = TRUE), 5) + plot_field(res, tc_A, + title = co$label, + subtitle = sprintf("L=2 fixed | peak=%.5f", pk), + pt_size = 0.3) +}) +``` + +```{r sec4-plot, fig.height=4} +wrap_plots(dl_pl, nrow = 1) + + plot_annotation( + title = "Section 4: Fixed L=2, Varying D and lambda", + subtitle = "Shape IDENTICAL (same L). Amplitude proportional to 1/D.", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +--- + +# Section 5 -- Boundary conditions + +```{r sec5} +res_dir <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.2, method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet", grid_resolution = 256L, verbose = FALSE) + +res_neu <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.2, method = "fd", fd_solver = "direct", + boundary_condition = "neumann", grid_resolution = 256L, verbose = FALSE) + +res_bcd <- res_dir +res_bcd$field <- res_neu$field - res_dir$field +``` + +```{r sec5-crosssection} +sl <- function(res, lbl) { + df <- field_to_df(res) + s <- df[abs(df$y) < res$hy * 1.5 & !is.na(df$field), ] + s <- s[order(s$x), ]; s$BC <- lbl; s +} +sl_all <- rbind(sl(res_dir, "Dirichlet"), sl(res_neu, "Neumann")) + +p5d <- ggplot(sl_all, aes(x = x, y = field, color = BC)) + + geom_line(linewidth = 0.8) + + scale_color_manual(values = c("Dirichlet" = "#e74c3c", "Neumann" = "#2980b9")) + + labs(title = "5d. Cross-section at y = 0", + subtitle = "Dirichlet drops to 0; Neumann stays elevated", + x = "X", y = "Conc.", color = "BC") + + theme_demo() +``` + +```{r sec5-plot, fig.height=8} +(plot_field(res_dir, tc_A, + title = "5a. Dirichlet (C=0 at boundary)", + subtitle = "D=1, lambda=0.2, L ~ 2.24") + + plot_field(res_neu, tc_A, + title = "5b. Neumann (dC/dn=0)", + subtitle = "Molecules reflected; higher near edges", + palette = "plasma")) / +(plot_field(res_bcd, + title = "5c. Neumann - Dirichlet", + subtitle = "Neumann retains more concentration near boundary", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE) + + p5d) + +plot_annotation( + title = "Section 5: Boundary Condition Comparison", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**Recommendation:** Use `"dirichlet"` for tissue sections. Neumann BCs are mainly useful when comparing to analytic solutions or modelling sealed compartments. + +--- + +# Section 6 -- UMI weighting vs. equal weighting + +```{r sec6} +res_uw <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = NULL, verbose = FALSE) + +res_wt <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = "umi", verbose = FALSE) + +nrm <- function(r) { + f <- r$field + r$field <- (f - min(f, na.rm = TRUE)) / diff(range(f, na.rm = TRUE)) + r +} +``` + +```{r sec6-plot, fig.height=4} +plot_field(nrm(res_uw), tc_A, + title = "6a. Unweighted", + subtitle = "All transcripts equal weight", + fill_label = "Norm. Conc.") + +plot_field(nrm(res_wt), tc_A, + title = "6b. UMI-weighted", + subtitle = "High-UMI cluster source amplified", + palette = "plasma", + fill_label = "Norm. Conc.") + +plot_annotation( + title = "Section 6: UMI Weighting", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +--- + +# Section 7 -- External transcripts + +`include_external = TRUE` includes transcripts outside the mask as sources, useful when transcripts +fall just outside the fitted mask boundary. + +```{r sec7} +tc_ext <- data.frame(x = rnorm(80, 6.5, 0.4), + y = rnorm(80, 0, 0.5), + umi = rpois(80, 10)) +ext_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_ext, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L]) +tc_ext <- tc_ext[!ext_in, ] +tc_Ax <- rbind(tc_A, tc_ext) + +res_ne <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_Ax, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, include_external = FALSE, verbose = FALSE) + +res_we <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_Ax, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, include_external = TRUE, verbose = FALSE) + +ext_pt_layer <- geom_point(data = tc_ext, aes(x = x, y = y), + color = "orange", size = 0.9, alpha = 0.8, + inherit.aes = FALSE) +``` + +```{r sec7-plot, fig.height=4} +(plot_field(res_ne, tc_A, title = "7a. include_external = FALSE") + + ext_pt_layer) + +(plot_field(res_we, tc_A, title = "7b. include_external = TRUE", + palette = "plasma") + + ext_pt_layer) + +plot_annotation( + title = "Section 7: External Transcripts (orange = outside mask)", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +--- + +# Section 8 -- Holey mask: vascular clearance + +A key strength of `method = "fd"` is correct handling of holes (vessel lumens) in the mask. The `green` method ignores topology. + +```{r sec8-mask} +vd <- list(list(cx = -2, cy = 1.5, r = 1.2), + list(cx = 2.5, cy = -2, r = 1.0), + list(cx = -4.5, cy = -2.5, r = 0.8)) +in_v <- function(x, y) Reduce(`|`, lapply(vd, function(v) + sqrt((x - v$cx)^2 + (y - v$cy)^2) < v$r)) + +n_h <- 3000 +cnd_h <- data.frame(x = runif(n_h * 8, -11, 11), + y = runif(n_h * 8, -9, 9)) +kp_h <- in_kidney(cnd_h$x, cnd_h$y) & !in_v(cnd_h$x, cnd_h$y) +pts_h <- cnd_h[kp_h, ][1:n_h, ] + +if (requireNamespace("TissueMask", quietly = TRUE)) { + holey_mask <- TissueMask::fit_spatial_mask( + pts_h, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, + raster_threshold = 0.2, verbose = FALSE) +} else { + pts_hf <- st_as_sf(pts_h, coords = c("x","y")) + holey_mask <- st_convex_hull(st_union(pts_hf)) +} +tc_h <- sample_in_mask(holey_mask, 400) +``` + +```{r sec8-solve} +res_hfd <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = FALSE) + +res_htight <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 1.0, method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = FALSE) + +res_hgrn <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 0.15, method = "green", + grid_resolution = 256L, verbose = FALSE) + +res_hdiff <- res_hfd +res_hdiff$field <- res_hfd$field - res_hgrn$field +``` + +```{r sec8-plot, fig.height=8} +(plot_field(res_hfd, tc_h, + title = "8a. fd | L ~ 2.58", + subtitle = "Vessel lumens correctly zeroed", + pt_size = 0.3) + + plot_field(res_htight, tc_h, + title = "8b. fd | L = 1 (tight)", + subtitle = "Steeper depletion near vessels", + palette = "inferno", pt_size = 0.3)) / +(plot_field(res_hgrn, tc_h, + title = "8c. green -- topology blind", + subtitle = "FFT ignores holes entirely", + palette = "plasma", pt_size = 0.3) + + plot_field(res_hdiff, + title = "8d. fd - green", + subtitle = "fd zeros lumens; green does not", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE)) + +plot_annotation( + title = "Section 8: Holey Mask -- Vascular Clearance", + subtitle = "Use method='fd' whenever the mask has holes", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +> **Important:** If your tissue mask has holes (vessel lumens, necrotic cores), always use +> `method = "fd"`. The `green` and `kde` methods are unaware of domain topology. + +--- + +# Section 9 -- Wide dynamic range and log-scale visualisation + +When transcript counts span several orders of magnitude, use `log_scale = TRUE` in `plot_field()` +or `log_transform = TRUE` in the solver call to reveal background structure. + +```{r sec9} +tc_w <- rbind( + data.frame(x = rnorm(20, -5, 0.3), y = rnorm(20, 4, 0.3), umi = rpois(20, 200)), + data.frame(x = rnorm(300, 0, 4), y = rnorm(300, 0, 3), umi = rpois(300, 1))) +tc_w_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_w[, c("x","y")], coords = c("x","y")), + mu_t, sparse = FALSE)[, 1L]) +tc_w <- tc_w[tc_w_in, ] + +res_w <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_w, + D = 1, lambda = 0.5, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = "umi", verbose = FALSE) +``` + +```{r sec9-plot, fig.height=4} +plot_field(res_w, tc_w, + title = "9a. Linear scale", + subtitle = "Background invisible; cluster dominates", + pt_color = "white") + +plot_field(res_w, tc_w, + log_scale = TRUE, + title = "9b. log1p scale", + subtitle = "Background and cluster both visible", + palette = "viridis", pt_color = "white") + +plot_annotation( + title = "Section 9: Wide Dynamic Range", + subtitle = "Use log_scale=TRUE when max/min ratio > ~20", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +--- + +# Section 10 -- Two-gene ratio map + +A `log2(A/B)` ratio map shows spatial domains of ligand/receptor balance. + +```{r sec10} +ph <- list(D = 1, lambda = 0.25, production_rate = 1, + method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = FALSE) + +res_gA <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A), ph)) +res_gB <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_B), ph)) + +res_rt <- res_gA +res_rt$field <- log2((res_gA$field + 1e-6) / (res_gB$field + 1e-6)) +``` + +```{r sec10-plot, fig.height=8} +p_ratio <- plot_field(res_rt, + title = "10c. log2(A/B) ratio", + subtitle = "Red=A territory | Blue=B territory | White=balanced", + symmetric = TRUE, show_contours = TRUE, n_contours = 5, + show_pts = FALSE, fill_label = "log2(A/B)") + + geom_point(data = tc_A, aes(x = x, y = y), + color = "#e74c3c", size = 0.3, alpha = 0.4, + inherit.aes = FALSE) + + geom_point(data = tc_B, aes(x = x, y = y), + color = "#27ae60", size = 0.3, alpha = 0.4, + inherit.aes = FALSE) + +p_dens <- ggplot(field_to_df(res_rt), aes(x = field, + fill = ifelse(field < 0, "B dominant", "A dominant"))) + + geom_density(alpha = 0.55) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_fill_manual(values = c("B dominant" = "#2980b9", + "A dominant" = "#c0392b"), name = NULL) + + labs(title = "10d. Distribution of log2(A/B)", + subtitle = "Fraction of tissue in each territory", + x = "log2(A/B)", y = "Density") + + theme_demo() + +(plot_field(res_gA, tc_A, title = "10a. Gene A field") + + plot_field(res_gB, tc_B, title = "10b. Gene B field", palette = "viridis")) / +(p_ratio + p_dens) + +plot_annotation( + title = "Section 10: Two-Gene Ratio Map", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +--- + +# Section 11 -- 100k transcripts and solver timing + +```{r sec11, message=TRUE} +tc_100k <- sample_in_mask(tissue_mask, 100000) +cat(sprintf("Sampled %d transcripts.\n", nrow(tc_100k))) + +t1 <- system.time(r1 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = TRUE)) + +t2 <- system.time(r2 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "fd", fd_solver = "iterative", + sor_omega = 1.75, grid_resolution = 256L, verbose = TRUE)) + +t3 <- system.time(r3 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "green", + grid_resolution = 256L, verbose = TRUE)) +``` + +```{r sec11-table} +knitr::kable(data.frame( + Solver = c("fd/direct (N=256)", "fd/SOR (N=256)", "green/FFT (N=256)"), + Time_s = round(c(t1["elapsed"], t2["elapsed"], t3["elapsed"]), 2), + Has_BCs = c("Yes", "Yes (Dirichlet only)", "No"), + Handles_holes = c("Yes", "Yes", "No"), + Notes = c("Sparse LU; recommended up to N=400", + "Iterative; use for N > 400", + "Fastest; infinite domain only") +)) +``` + +```{r sec11-plot, fig.height=4} +sub <- tc_100k[sample(nrow(tc_100k), 2000), ] +plot_field(r1, sub, + title = sprintf("11a. fd/direct (%.1f s)", t1["elapsed"]), + subtitle = "N=256 | sparse LU", + pt_size = 0.15, pt_alpha = 0.2) + +plot_field(r2, sub, + title = sprintf("11b. fd/SOR (%.1f s)", t2["elapsed"]), + subtitle = "N=256 | iterative", + palette = "plasma", pt_size = 0.15, pt_alpha = 0.2) + +plot_annotation( + title = "Section 11: 100k Transcripts -- Solver Comparison", + subtitle = "2,000 of 100,000 points shown", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +--- + +# Section 12 -- Quantitative field extraction + +```{r sec12} +res_q <- res_fd + +interpolate_field <- function(result, query_pts) { + xg <- result$x; yg <- result$y; f <- result$field + vapply(seq_len(nrow(query_pts)), function(i) { + qx <- query_pts$x[i]; qy <- query_pts$y[i] + xi <- findInterval(qx, xg); yi <- findInterval(qy, yg) + if (xi < 1 || xi >= length(xg) || yi < 1 || yi >= length(yg)) + return(NA_real_) + wx <- (qx - xg[xi]) / (xg[xi+1] - xg[xi]) + wy <- (qy - yg[yi]) / (yg[yi+1] - yg[yi]) + (1-wx)*(1-wy)*f[xi,yi] + wx*(1-wy)*f[xi+1,yi] + + (1-wx)*wy *f[xi,yi+1] + wx*wy *f[xi+1,yi+1] + }, numeric(1)) +} +``` + +```{r sec12-queries} +queries <- data.frame( + label = c("Cluster core", "Cluster edge", "Tissue centre", + "Far right", "Near notch"), + x = c(-4.5, -3.0, 0.0, 5.0, 3.0), + y = c( 3.8, 2.5, 0.0, 0.0, 0.0)) +queries$concentration <- interpolate_field(res_q, queries) +knitr::kable(queries, digits = 5) +``` + +```{r sec12-stats} +df_q <- field_to_df(res_q) + +cat(sprintf("Mean concentration:\n Left half (x < 0): %.5f\n Right half (x >= 0): %.5f\n", + mean(df_q$field[df_q$x < 0 & !is.na(df_q$field)]), + mean(df_q$field[df_q$x >= 0 & !is.na(df_q$field)]))) + +pk <- which.max(res_q$field) +N <- length(res_q$x) +cat(sprintf("\nPeak: x=%.2f, y=%.2f, value=%.6f\n", + res_q$x[((pk-1L) %% N) + 1L], + res_q$y[((pk-1L) %/% N) + 1L], + res_q$field[pk])) + +total <- sum(res_q$field[res_q$mask], na.rm = TRUE) * res_q$hx * res_q$hy +theory <- nrow(tc_A) * 1.0 / 0.3 +cat(sprintf("\nField integral: %.4f\nTheory (n * r / lambda): %.4f\nAgreement: %.1f%%\n", + total, theory, 100 * (1 - abs(total - theory) / theory))) +``` + +```{r sec12-profile, fig.height=3.5} +prof <- df_q[abs(df_q$y) < res_q$hy * 1.5 & !is.na(df_q$field), ] +ggplot(prof[order(prof$x), ], aes(x = x, y = field)) + + geom_line(color = "#e74c3c", linewidth = 0.8) + + geom_area(fill = "#e74c3c", alpha = 0.15) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") + + labs(title = "12e. Concentration profile along y = 0", + subtitle = "Cluster peak at x ~ -4.5; near-zero at right edge", + x = "X", y = "Concentration") + + theme_demo() +``` + +--- + +# Parameter quick-reference + +``` +estimate_concentration_field() -- parameter guide +-------------------------------------------------------------------- + +PHYSICS + diffusion_length Primary tuning knob. Set directly: L = 0.5 to 10 + Rule: L ~ 10-20% of inter-cluster distance + D Affects amplitude only (amplitude proportional to 1/D) + lambda Affects amplitude only; lambda = D / L^2 + production_rate Global amplitude scaling; use weight_col for per-cell + +SOLVER + method = "fd" Default. Use whenever you need accurate BCs or holes. + method = "green" Faster for sweeps; ignores domain shape. + method = "kde" Quick baseline; no physics. + + fd_solver = "direct" N <= 400 (sparse LU decomposition) + fd_solver = "iterative" N > 400 (red-black SOR) + + boundary_condition = "dirichlet" C=0 at boundary. Default. + boundary_condition = "neumann" dC/dn=0. Sealed system. + + grid_resolution = 256 Explore. 512 for publication quality. + +WEIGHTS + weight_col = "umi" Use UMI counts as source strength. + +POST-PROCESSING + log_transform = TRUE Apply log1p() after solving. + normalize = TRUE Rescale to [0,1] for comparison. + clip_negative = TRUE Remove small numerical negatives. Default on. +-------------------------------------------------------------------- +``` + +--- + +# Session information + +```{r session-info} +sessionInfo() +``` diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd new file mode 100644 index 0000000..69d46ff --- /dev/null +++ b/vignettes/getting-started.Rmd @@ -0,0 +1,167 @@ +--- +title: "Getting Started with TissueField" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Getting Started with TissueField} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + fig.width = 7, + fig.height = 4.5 +) +``` + +## What is a concentration field? + +Spatial transcriptomics gives us discrete detections: a list of (x, y) coordinates where individual mRNA molecules were found. But many biological processes operate through continuous gradients — ligand concentrations, cytokine fields, metabolite distributions. **TissueField** bridges that gap. + +Given a set of transcript coordinates and a tissue mask polygon, `estimate_concentration_field()` computes the **steady-state concentration** that those transcripts would produce as diffusing point sources inside the tissue: + +$$D \nabla^2 C(\mathbf{x}) - \lambda C(\mathbf{x}) + s(\mathbf{x}) = 0$$ + +The output is a dense numeric matrix — a spatial field — defined over the tissue with `NA` outside it. + +The single most important parameter is the **diffusion length**: + +$$L = \sqrt{D / \lambda}$$ + +$L$ is roughly the radius of influence of one transcript. Set it to match the biological scale of interest. + +--- + +## A first example + +We start by creating a simple circular tissue and placing synthetic transcripts inside it. + +```{r tissue} +library(TissueField) +library(sf) +library(ggplot2) + +set.seed(2024) + +# Circular mask (radius = 10, centre = origin) +theta <- seq(0, 2 * pi, length.out = 201) +circle_xy <- cbind(10 * cos(theta), 10 * sin(theta)) +circle_xy[201, ] <- circle_xy[1, ] # close exactly (avoid floating point gap) +mask <- st_sfc(st_polygon(list(circle_xy))) + +# 200 transcripts: focal cluster at (-4, 3) and uniform background +tc_cluster <- data.frame(x = rnorm(120, -4, 1.5), y = rnorm(120, 3, 1.2)) +tc_bg <- data.frame(x = runif(80, -9, 9), y = runif(80, -9, 9)) +# keep only those inside the mask +inside_fn <- function(d) { + as.logical(st_within(st_as_sf(d, coords = c("x","y")), mask, sparse = FALSE)[,1]) +} +tc <- rbind(tc_cluster[inside_fn(tc_cluster), ], tc_bg[inside_fn(tc_bg), ]) +``` + +### Compute the concentration field + +Call `estimate_concentration_field()` with a moderate diffusion length. Here we set `L = 3` directly using the `diffusion_length` argument, which is the simplest way to control spatial scale. + +```{r solve, message=TRUE} +res <- estimate_concentration_field( + mask = mask, + transcript_coords = tc, + diffusion_length = 3, # L = 3 spatial units + D = 1, # sets amplitude (shape depends only on L) + method = "fd", # finite-difference, recommended + grid_resolution = 128L, + verbose = TRUE +) +``` + +### Inspect the return value + +```{r inspect} +names(res) +dim(res$field) # N x N matrix +res$params$diffusion_length +res$diagnostics +``` + +### Visualise with `field_to_df()` and ggplot2 + +`field_to_df()` unpacks the matrix into a tidy data frame: + +```{r plot-manual, fig.height=5} +df <- field_to_df(res) # inside_only = TRUE by default +head(df) + +ggplot(df, aes(x = x, y = y, fill = field)) + + geom_raster(interpolate = TRUE) + + geom_point(data = tc, aes(x = x, y = y), + inherit.aes = FALSE, color = "#00e5ff", + size = 0.6, alpha = 0.6) + + scale_fill_viridis_c(option = "magma", name = "Conc.") + + coord_equal() + theme_minimal(base_size = 12) + + labs(title = "Concentration field (L = 3)", + subtitle = "Cyan points = transcript locations") +``` + +### Or use the built-in plot function + +`plot_concentration_field()` wraps the same ggplot2 code with sensible defaults: + +```{r plot-builtin, fig.height=5} +plot_concentration_field(res, transcript_coords = tc, + show_contours = TRUE, n_contours = 6) +``` + +--- + +## Understanding diffusion length + +The field shape is entirely determined by $L$. Use `sweep_diffusion_length()` to compare several values at once: + +```{r sweep, fig.height=5, fig.width=9} +library(patchwork) + +L_vals <- c(0.8, 2, 5, 12) +plots <- lapply(L_vals, function(Lv) { + res_l <- estimate_concentration_field( + mask, tc, diffusion_length = Lv, D = 1, + method = "fd", grid_resolution = 128L, verbose = FALSE + ) + df_l <- field_to_df(res_l) + # normalise each panel independently for visual comparison + rng <- range(df_l$field, na.rm = TRUE) + df_l$field_norm <- (df_l$field - rng[1]) / diff(rng) + ggplot(df_l, aes(x = x, y = y, fill = field_norm)) + + geom_raster(interpolate = TRUE) + + scale_fill_viridis_c(option = "magma", limits = c(0, 1), + name = "Norm.\nConc.", na.value = "transparent") + + coord_equal() + theme_minimal(base_size = 9) + + labs(title = sprintf("L = %g", Lv), x = NULL, y = NULL) +}) + +wrap_plots(plots, nrow = 1) + + plot_annotation( + title = "Effect of diffusion length (each panel normalised independently)", + subtitle = "Small L: tight peaks around sources | Large L: tissue-wide field", + theme = theme(plot.title = element_text(face = "bold", size = 12), + plot.subtitle = element_text(size = 9, color = "grey40")) + ) +``` + +**Practical guidance:** + +- Set $L$ to **10--20% of the typical inter-cluster distance** to get fields that peak at sources and decay between them. +- Larger $L$ produces smoother, more "territorial" fields where influence extends across the whole tissue. +- $D$ only affects the amplitude ($C \propto 1/D$), not the spatial pattern. See `vignette("parameter-tuning")` for details. + +--- + +## Next steps + +- `vignette("solver-guide")`: compare the three solvers (`fd`, `green`, `kde`) and boundary conditions. +- `vignette("parameter-tuning")`: deep dive into $L$, $D$, $\lambda$, UMI weighting, and grid resolution. +- `vignette("complete-reference")`: full 12-section reference covering all features. diff --git a/vignettes/parameter-tuning.Rmd b/vignettes/parameter-tuning.Rmd new file mode 100644 index 0000000..e8eb839 --- /dev/null +++ b/vignettes/parameter-tuning.Rmd @@ -0,0 +1,390 @@ +--- +title: "Parameter Tuning Guide" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Parameter Tuning Guide} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + fig.width = 8, + fig.height = 4.5 +) +``` + +```{r libs} +library(TissueField) +library(sf) +library(ggplot2) +library(patchwork) +set.seed(2024) +``` + +This vignette covers the parameters that most strongly affect the output of `estimate_concentration_field()`. They are ordered from most impactful to least: + +1. **Diffusion length $L$** — controls spatial scale (shape) +2. **$D$ and $\lambda$ independently** — control amplitude +3. **`weight_col` (UMI weighting)** — rescales source strength per transcript +4. **`include_external`** — whether transcripts outside the mask contribute +5. **`grid_resolution`** — accuracy vs. speed +6. **`normalize` and `log_transform`** — output rescaling + +--- + +## Shared setup + +```{r setup-tissue} +# Kidney-bean mask +in_bean <- function(x, y) + (x/9)^2 + (y/7)^2 < 1 & sqrt((x - 4.5)^2 + y^2) >= 3.5 + +cands <- data.frame(x = runif(20000, -11, 11), y = runif(20000, -9, 9)) +tissue_pts <- cands[in_bean(cands$x, cands$y), ][1:3000, ] + +if (requireNamespace("TissueMask", quietly = TRUE)) { + mask <- TissueMask::fit_spatial_mask( + tissue_pts, method = "raster", + raster_resolution = 200L, raster_sigma = 0.8, + raster_threshold = 0.15, verbose = FALSE + ) +} else { + pts_sf <- st_as_sf(tissue_pts, coords = c("x", "y")) + mask <- st_convex_hull(st_union(pts_sf)) +} +mu <- st_union(mask) + +inside_fn <- function(d) + as.logical(st_within(st_as_sf(d, coords = c("x","y")), mu, sparse = FALSE)[,1]) + +tc_cl <- data.frame(x = rnorm(280, -3, 1.2), y = rnorm(280, 3.5, 0.9)) +tc_bg_all <- data.frame(x = runif(600, -10, 10), y = runif(600, -8, 8)) +tc <- rbind(tc_cl[inside_fn(tc_cl), ], + tc_bg_all[inside_fn(tc_bg_all), ][1:80, ]) +tc$umi <- c(rpois(sum(inside_fn(tc_cl)), 18), + rpois(80, 3)) +``` + +--- + +## 1. Diffusion length L + +$L = \sqrt{D/\lambda}$ is the single most important parameter. It determines **how far a transcript's influence extends** in the tissue. + +Use `diffusion_length` to set $L$ directly, bypassing the $D$/$\lambda$ decomposition: + +```{r L-sweep, fig.height=9, fig.width=8} +L_vals <- c(0.5, 1.5, 3.5, 7.0, 12.0, 20.0) + +L_plots <- lapply(L_vals, function(Lv) { + r <- estimate_concentration_field( + mask, tc, diffusion_length = Lv, D = 1, + method = "fd", fd_solver = "direct", + grid_resolution = 128L, verbose = FALSE + ) + df <- field_to_df(r) + rng <- range(df$field, na.rm = TRUE) + df$fn <- (df$field - rng[1]) / diff(rng) + ggplot(df, aes(x = x, y = y, fill = fn)) + + geom_raster(interpolate = TRUE) + + scale_fill_viridis_c(option = "magma", limits = c(0, 1), + name = "Norm.\nConc.", na.value = "transparent") + + coord_equal() + theme_minimal(base_size = 8) + + labs(title = sprintf("L = %g", Lv), + subtitle = sprintf("lambda = %.4f", 1/Lv^2), + x = NULL, y = NULL) +}) + +wrap_plots(L_plots, nrow = 2) + + plot_annotation( + title = "Diffusion Length Sweep (normalised per panel)", + subtitle = "Small L: tight peaks | Large L: tissue-wide field", + theme = theme(plot.title = element_text(face = "bold", size = 12), + plot.subtitle = element_text(size = 9, color = "grey40")) + ) +``` + +```{r L-peak-table, echo=FALSE} +peaks <- sapply(L_vals, function(Lv) { + r <- estimate_concentration_field( + mask, tc, diffusion_length = Lv, D = 1, + method = "fd", fd_solver = "direct", + grid_resolution = 128L, verbose = FALSE + ) + max(r$field, na.rm = TRUE) +}) +knitr::kable( + data.frame(L = L_vals, lambda = round(1/L_vals^2, 5), + peak_concentration = signif(peaks, 4)), + caption = "Peak concentration vs diffusion length (D=1 throughout)" +) +``` + +**How to choose L in practice:** + +| Scenario | Suggested L | +|---|---| +| Tight spatial domains, distinguish adjacent clusters | 10--20% of inter-cluster distance | +| Moderate-range signalling (e.g. paracrine cytokines) | 20--50% of tissue diameter | +| Tissue-wide gradient (e.g. oxygen, morphogens) | > 50% of tissue diameter | +| Quick exploration | Try L = 1, 3, 10 and compare | + +> **Shortcut**: use `sweep_diffusion_length(c(1, 3, 10), mask, tc)` to get a combined data frame for all three values. + +--- + +## 2. D and lambda: shape vs amplitude + +**The spatial shape of the field depends only on $L = \sqrt{D/\lambda}$.** Changing $D$ and $\lambda$ while keeping their ratio constant produces identical spatial patterns — only the amplitude changes, scaling as $C \propto 1/D$. + +```{r DL-fixed, fig.height=4, fig.width=9} +# L = 2 throughout: D * lambda = 1/L^2 = 0.25 +dl_cases <- list( + list(D = 0.1, lam = 0.025, lbl = "D=0.1, lam=0.025"), + list(D = 1.0, lam = 0.25, lbl = "D=1, lam=0.25"), + list(D = 5.0, lam = 1.25, lbl = "D=5, lam=1.25"), + list(D = 20.0, lam = 5.0, lbl = "D=20, lam=5") +) + +dl_plots <- lapply(dl_cases, function(co) { + r <- estimate_concentration_field( + mask, tc, D = co$D, lambda = co$lam, + method = "fd", fd_solver = "direct", + grid_resolution = 128L, verbose = FALSE + ) + pk <- signif(max(r$field, na.rm = TRUE), 3) + df <- field_to_df(r) + ggplot(df, aes(x = x, y = y, fill = field)) + + geom_raster(interpolate = TRUE) + + scale_fill_viridis_c(option = "magma", name = "Conc.", + na.value = "transparent") + + coord_equal() + theme_minimal(base_size = 8) + + labs(title = co$lbl, + subtitle = sprintf("L=2 fixed | peak=%g", pk), + x = NULL, y = NULL) +}) + +wrap_plots(dl_plots, nrow = 1) + + plot_annotation( + title = "Fixed L=2, Varying D and lambda", + subtitle = "SHAPE is identical. Amplitude scales as 1/D.", + theme = theme(plot.title = element_text(face = "bold", size = 12), + plot.subtitle = element_text(size = 9, color = "grey40")) + ) +``` + +**Practical implication:** When fitting to data, first determine $L$ (the spatial scale), then fix $D$ based on biophysical priors (or leave at 1 if only relative concentrations matter). $\lambda$ is then determined as $\lambda = D/L^2$. + +--- + +## 3. UMI weighting (weight_col) + +Each transcript can carry a per-molecule count (UMI) that scales its contribution to the source term. Pass the column name to `weight_col`: + +```{r umi-run} +# Without weighting +res_unweighted <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + method = "fd", grid_resolution = 128L, verbose = FALSE +) + +# With UMI weighting +res_weighted <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, weight_col = "umi", + method = "fd", grid_resolution = 128L, verbose = FALSE +) +``` + +```{r umi-plot, fig.height=4} +panel2 <- function(result, title = "") { + df <- field_to_df(result) + ggplot(df, aes(x = x, y = y, fill = field)) + + geom_raster(interpolate = TRUE) + + scale_fill_viridis_c(option = "magma", name = "Conc.", + na.value = "transparent") + + coord_equal() + theme_minimal(base_size = 9) + + labs(title = title, x = "X", y = "Y") + + theme(plot.title = element_text(face = "bold")) +} + +(panel2(res_unweighted, "Unweighted (all transcripts = 1)") + + panel2(res_weighted, "UMI-weighted (cluster UMI ~18, bg ~3)")) + +plot_annotation( + title = "Effect of UMI Weighting", + theme = theme(plot.title = element_text(face = "bold", size = 12)) +) +``` + +```{r umi-stats, echo=FALSE} +knitr::kable(data.frame( + Version = c("Unweighted", "UMI-weighted"), + Peak = signif(c(max(res_unweighted$field, na.rm = TRUE), + max(res_weighted$field, na.rm = TRUE)), 4), + Total_source = c(sum(res_unweighted$sources, na.rm = TRUE), + sum(res_weighted$sources, na.rm = TRUE)) +), caption = "Peak concentration and total source strength") +``` + +**When to use `weight_col`:** When your data has UMI counts that reflect true mRNA abundance differences. Unweighted fields treat each detected molecule equally; weighted fields amplify contribution from high-UMI transcripts (often genuine high-expressers). + +--- + +## 4. External transcripts (include_external) + +By default, transcripts outside the mask are discarded. Setting `include_external = TRUE` includes them as sources, which can create elevated concentration near the tissue boundary from external signal. + +```{r external-run} +# Add some transcripts outside the mask +tc_ext <- rbind(tc, + data.frame(x = c(-12, -11, 13), y = c(0, 5, -2), umi = c(50, 40, 30))) + +res_excl <- estimate_concentration_field( + mask, tc_ext, D = 1, lambda = 0.2, + include_external = FALSE, + grid_resolution = 128L, verbose = FALSE +) + +res_incl <- estimate_concentration_field( + mask, tc_ext, D = 1, lambda = 0.2, + include_external = TRUE, + grid_resolution = 128L, verbose = FALSE +) +``` + +```{r external-plot, fig.height=4} +(panel2(res_excl, "include_external = FALSE (default)") + + panel2(res_incl, "include_external = TRUE")) + +plot_annotation( + title = "Effect of include_external", + subtitle = "External sources can elevate concentration near edges", + theme = theme(plot.title = element_text(face = "bold", size = 12), + plot.subtitle = element_text(size = 9, color = "grey40")) +) +``` + +--- + +## 5. Grid resolution + +`grid_resolution` sets the number of cells per axis ($N \times N$). Higher resolution gives smoother, more accurate fields but increases memory and solve time. + +```{r resolution-run} +res_times <- sapply(c(64L, 128L, 256L, 512L), function(N) { + t0 <- proc.time()["elapsed"] + estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + method = "fd", fd_solver = "direct", + grid_resolution = N, verbose = FALSE + ) + proc.time()["elapsed"] - t0 +}) +``` + +```{r resolution-table, echo=FALSE} +knitr::kable(data.frame( + N = c(64, 128, 256, 512), + Grid_cells = c(64, 128, 256, 512)^2, + Time_s = round(res_times, 2) +), caption = "Wall time vs grid resolution (direct solver)") +``` + +```{r resolution-plot, fig.height=3.5, fig.width=9} +res_low <- estimate_concentration_field( + mask, tc, D=1, lambda=0.2, method="fd", + grid_resolution = 32L, verbose = FALSE +) +res_med <- estimate_concentration_field( + mask, tc, D=1, lambda=0.2, method="fd", + grid_resolution = 128L, verbose = FALSE +) +res_high <- estimate_concentration_field( + mask, tc, D=1, lambda=0.2, method="fd", + grid_resolution = 256L, verbose = FALSE +) + +(panel2(res_low, "N = 32") + + panel2(res_med, "N = 128") + + panel2(res_high, "N = 256")) + +plot_annotation( + title = "Grid Resolution Comparison", + theme = theme(plot.title = element_text(face = "bold", size = 12)) +) +``` + +**Practical guidance:** + +- `N = 64--128`: fast exploration, reasonable accuracy +- `N = 256`: good balance; default +- `N = 512+`: high-resolution publication figures; use `fd_solver = "iterative"` or `method = "green"` to manage memory + +--- + +## 6. Normalise and log-transform + +Two output rescaling options: + +- `normalize = TRUE`: linearly rescales the field to $[0, 1]$. Useful when comparing fields across genes with different expression levels. +- `log_transform = TRUE`: applies `log1p()` after solving. Compresses wide dynamic range (useful for highly focal sources). + +```{r transform-plot, fig.height=4} +res_raw <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, grid_resolution = 128L, verbose = FALSE +) +res_norm <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, normalize = TRUE, + grid_resolution = 128L, verbose = FALSE +) +res_log <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, log_transform = TRUE, + grid_resolution = 128L, verbose = FALSE +) + +(panel2(res_raw, "Raw field") + + panel2(res_norm, "normalize = TRUE (field in [0,1])") + + panel2(res_log, "log_transform = TRUE (log1p applied)")) + +plot_annotation( + title = "Output Rescaling Options", + theme = theme(plot.title = element_text(face = "bold", size = 12)) +) +``` + +```{r transform-ranges, echo=FALSE} +knitr::kable(data.frame( + Setting = c("Raw", "normalize=TRUE", "log_transform=TRUE"), + Min = round(sapply(list(res_raw, res_norm, res_log), + function(r) min(r$field, na.rm = TRUE)), 5), + Max = round(sapply(list(res_raw, res_norm, res_log), + function(r) max(r$field, na.rm = TRUE)), 5) +), caption = "Field range under different output options") +``` + +--- + +## Parameter quick-reference + +```{r quickref, echo=FALSE} +qr <- data.frame( + Parameter = c("diffusion_length", "D", "lambda", "weight_col", + "include_external", "grid_resolution", "normalize", + "log_transform", "boundary_condition", "fd_solver"), + Controls = c("Spatial scale of field (shape)", + "Amplitude (C proportional to 1/D)", + "Clearance rate (determines L with D)", + "Per-transcript source weight", + "Whether to use off-mask transcripts", + "Grid density (N x N cells)", + "Rescale output to [0, 1]", + "Apply log1p() to output", + "Dirichlet (absorbing) or Neumann (reflecting)", + "Direct (LU) or iterative (SOR)"), + Default = c("NULL (computed from D and lambda)", "1.0", "0.1", + "NULL", "FALSE", "256", "FALSE", "FALSE", + "dirichlet", "direct") +) +knitr::kable(qr) +``` diff --git a/vignettes/solver-guide.Rmd b/vignettes/solver-guide.Rmd new file mode 100644 index 0000000..c67cab5 --- /dev/null +++ b/vignettes/solver-guide.Rmd @@ -0,0 +1,318 @@ +--- +title: "Solver Guide: fd, green, and kde" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Solver Guide: fd, green, and kde} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + fig.width = 8, + fig.height = 5 +) +``` + +```{r libs} +library(TissueField) +library(sf) +library(ggplot2) +library(patchwork) +set.seed(2024) +``` + +TissueField provides three solvers selected via the `method` argument: + +| Method | Algorithm | Boundary conditions | Speed | Best for | +|--------|-----------|---------------------|-------|----------| +| `"fd"` | Finite-difference sparse linear system | Exact (Dirichlet or Neumann) | Fast for N <= 400 | **Default; recommended** | +| `"green"` | FFT convolution with analytic $K_0(\kappa r)$ | None (infinite domain) | Fast for all N | Parameter sweeps, large grids | +| `"kde"` | Gaussian kernel density | None | Very fast | Quick visualisation | + +--- + +## Shared tissue setup + +```{r setup-tissue} +# Kidney-bean shaped tissue +in_bean <- function(x, y) + (x/9)^2 + (y/7)^2 < 1 & sqrt((x - 4.5)^2 + y^2) >= 3.5 + +cands <- data.frame(x = runif(20000, -11, 11), y = runif(20000, -9, 9)) +tissue_pts <- cands[in_bean(cands$x, cands$y), ][1:3000, ] + +# Build mask with raster method (requires TissueMask) +if (requireNamespace("TissueMask", quietly = TRUE)) { + mask <- TissueMask::fit_spatial_mask( + tissue_pts, method = "raster", + raster_resolution = 200L, raster_sigma = 0.8, + raster_threshold = 0.15, verbose = FALSE + ) +} else { + # Fallback: convex hull via sf + pts_sf <- st_as_sf(tissue_pts, coords = c("x", "y")) + mask <- st_convex_hull(st_union(pts_sf)) +} + +# Transcripts: cluster + background +mu <- st_union(mask) +inside_pts <- function(d) { + as.logical(st_within(st_as_sf(d, coords = c("x","y")), mu, sparse = FALSE)[,1]) +} + +tc_cl <- data.frame(x = rnorm(300, -3.5, 1.3), y = rnorm(300, 3.2, 0.9)) +tc_bg_all <- data.frame(x = runif(800, -10, 10), y = runif(800, -8, 8)) +tc_bg <- tc_bg_all[inside_pts(tc_bg_all), ][1:100, ] +tc <- rbind(tc_cl[inside_pts(tc_cl), ], tc_bg) +``` + +--- + +## 1. Solver comparison: fd vs green vs kde + +All three methods receive the same physics ($D=1$, $\lambda=0.3$, $L \approx 1.83$): + +```{r solver-run} +base_args <- list( + mask = mask, + transcript_coords = tc, + D = 1, lambda = 0.3, + grid_resolution = 128L, + verbose = FALSE +) + +res_fd <- do.call(estimate_concentration_field, + c(base_args, list(method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet"))) + +res_green <- do.call(estimate_concentration_field, + c(base_args, list(method = "green"))) + +res_kde <- do.call(estimate_concentration_field, + c(base_args, list(method = "kde", + kde_bandwidth = sqrt(1 / 0.3)))) # bandwidth = L + +# fd - green difference map +res_diff <- res_fd +res_diff$field <- res_fd$field - res_green$field +``` + +```{r solver-plot, fig.height=9, fig.width=8} +# Helper to make a panel +panel <- function(result, tc_pts = NULL, title = "", subtitle = "", + palette = "magma", symmetric = FALSE) { + df <- field_to_df(result) + fc <- "field" + p <- ggplot(df, aes(x = x, y = y, fill = .data[[fc]])) + + geom_raster(interpolate = TRUE) + coord_equal() + + geom_contour(aes(z = field), color = "white", + alpha = 0.3, linewidth = 0.25, bins = 6) + + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_minimal(base_size = 9) + + theme(plot.title = element_text(face = "bold"), + plot.subtitle = element_text(color = "grey40", size = 8)) + if (symmetric) { + lim <- max(abs(df$field), na.rm = TRUE) + p + scale_fill_distiller(palette = "RdBu", limits = c(-lim, lim), + name = "fd-green", na.value = "transparent") + } else { + p + scale_fill_viridis_c(option = palette, name = "Conc.", + na.value = "transparent") + } +} + +(panel(res_fd, title = "1a. fd (Dirichlet)", + subtitle = "Sparse linear system; exact BCs") + + panel(res_green, title = "1b. green (FFT)", + subtitle = "Infinite domain; no boundary effects")) / +(panel(res_kde, title = "1c. kde", + subtitle = paste0("Gaussian smoothing; bandwidth = L = ", + round(sqrt(1/0.3), 2))) + + panel(res_diff, title = "1d. fd minus green", + subtitle = "fd suppresses concentration near boundary", + symmetric = TRUE)) + +plot_annotation( + title = "Solver Comparison (D=1, lambda=0.3, L approx 1.83)", + theme = theme(plot.title = element_text(face = "bold", size = 12)) +) +``` + +**Key observations:** + +- **Panels 1a vs 1b**: The interior fields are nearly identical — both faithfully solve the screened Poisson PDE. The `fd` and `green` solvers agree well away from the boundary. +- **Panel 1d (difference)**: `fd` correctly drives concentration to zero at the tissue edge (Dirichlet). The `green` solver ignores the boundary and leaks concentration outward. Near-edge sources show the largest disagreement. +- **Panel 1c (kde)**: Gaussian smoothing produces a visually similar pattern but has no physical interpretation and no boundary enforcement. + +--- + +## 2. Boundary conditions: Dirichlet vs Neumann + +The `fd` solver supports two boundary conditions controlled by `boundary_condition`: + +- **`"dirichlet"`** (default): $C = 0$ at the tissue boundary. Molecules are absorbed at or degrade at the edge. Most appropriate for a histological section where the tissue edge is a hard barrier. +- **`"neumann"`**: $\partial C / \partial n = 0$ — zero normal flux; molecules are reflected at the boundary. Appropriate when modelling a sealed container or when the tissue edge is not a sink. + +```{r bc-run} +res_dir <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet", + grid_resolution = 128L, verbose = FALSE +) + +res_neu <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + method = "fd", fd_solver = "direct", + boundary_condition = "neumann", + grid_resolution = 128L, verbose = FALSE +) + +res_bcd <- res_dir +res_bcd$field <- res_neu$field - res_dir$field +``` + +```{r bc-plot, fig.height=4} +(panel(res_dir, title = "2a. Dirichlet (C = 0 at boundary)", + subtitle = "Default; molecules absorbed at edge") + + panel(res_neu, title = "2b. Neumann (zero flux at boundary)", + subtitle = "Molecules reflected; elevated near edge")) + +plot_annotation( + title = "Boundary Condition Comparison (D=1, lambda=0.2, L approx 2.24)", + theme = theme(plot.title = element_text(face = "bold", size = 12)) +) +``` + +```{r bc-crosssection, fig.height=3.5} +# Cross-section at y approx 0 +sl_fun <- function(res, lbl) { + df <- field_to_df(res) + s <- df[abs(df$y) < res$hy * 2 & !is.na(df$field), ] + s <- s[order(s$x), ] + s$BC <- lbl + s +} +sl_all <- rbind(sl_fun(res_dir, "Dirichlet"), sl_fun(res_neu, "Neumann")) + +ggplot(sl_all, aes(x = x, y = field, color = BC)) + + geom_line(linewidth = 0.9) + + scale_color_manual(values = c("Dirichlet" = "#e74c3c", "Neumann" = "#2980b9")) + + theme_minimal(base_size = 11) + + labs(title = "Cross-section at y = 0", + subtitle = "Dirichlet drops to 0 at edges; Neumann stays elevated", + x = "X", y = "Concentration", color = "BC") +``` + +**When does the choice matter?** When sources are near the tissue edge, Dirichlet can substantially reduce the predicted local concentration compared to Neumann. For sources in the tissue interior, the two BCs give nearly the same field. + +--- + +## 3. fd_solver: direct vs iterative (SOR) + +The `fd` method offers two solvers: + +- **`"direct"`** (default): LU factorisation of the sparse matrix via `Matrix::solve()`. Exact and fast for grids up to ~400 x 400. Memory cost scales as $O(K^{1.5})$ for a K-cell system. +- **`"iterative"`**: Red-black Successive Over-Relaxation (SOR). Converges to within `sor_tol` in `sor_max_iter` iterations. Lower peak memory; better for very large grids. + +```{r sor-run, message=TRUE} +res_dir_v <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + method = "fd", fd_solver = "direct", + grid_resolution = 128L, verbose = TRUE +) + +res_sor_v <- estimate_concentration_field( + mask, tc, D = 1, lambda = 0.2, + method = "fd", fd_solver = "iterative", + sor_omega = 1.7, sor_tol = 1e-6, + grid_resolution = 128L, verbose = TRUE +) +``` + +```{r sor-compare, fig.height=4} +res_sor_diff <- res_dir_v +res_sor_diff$field <- res_dir_v$field - res_sor_v$field + +(panel(res_dir_v, title = "3a. fd / direct", + subtitle = sprintf("solve time: %.2fs", + res_dir_v$diagnostics$elapsed_solve_s)) + + panel(res_sor_v, title = "3b. fd / iterative (SOR)", + subtitle = sprintf("solve time: %.2fs", + res_sor_v$diagnostics$elapsed_solve_s))) + +plot_annotation( + title = "Direct vs SOR (N=128)", + theme = theme(plot.title = element_text(face = "bold", size = 12)) +) +``` + +For `N = 128`, the two solvers agree to high precision. The direct solver is typically faster here. At `N = 512` or above, the iterative SOR becomes competitive because the LU factorisation becomes expensive. + +**SOR tuning:** + +- `sor_omega = 1.7` is a safe default. Values closer to 2 can accelerate convergence but may cause divergence. +- `sor_tol = 1e-6` is sufficient for visualisation. Tighten to `1e-8` for quantitative analysis. +- Increase `sor_max_iter` if you see a convergence warning. + +--- + +## 4. Solver selection guide + +```{r selection-table, echo=FALSE} +tbl <- data.frame( + Situation = c( + "N <= 400, standard use", + "Need exact Dirichlet or Neumann BCs", + "Holey mask (vessels, lumens)", + "N > 400 or large memory constraint", + "Fast parameter sweep over many L values", + "Quick visual sanity check" + ), + Recommendation = c( + 'method="fd", fd_solver="direct"', + 'method="fd"', + 'method="fd" (green/kde ignore interior holes)', + 'method="fd", fd_solver="iterative"', + 'method="green"', + 'method="kde"' + ) +) +knitr::kable(tbl, col.names = c("Situation", "Recommendation")) +``` + +--- + +## 5. Green's function advantage: rapid L sweeps + +Because `"green"` avoids assembling and factorising a sparse matrix, it is faster per call, especially at high resolution. This makes it ideal for exploring many diffusion lengths quickly: + +```{r green-sweep, fig.height=4, fig.width=9} +L_sweep <- c(0.5, 1.0, 2.0, 4.0, 8.0) + +sw_plots <- lapply(L_sweep, function(Lv) { + r <- estimate_concentration_field( + mask, tc, diffusion_length = Lv, D = 1, + method = "green", grid_resolution = 128L, verbose = FALSE + ) + df <- field_to_df(r) + rng <- range(df$field, na.rm = TRUE) + df$fn <- (df$field - rng[1]) / diff(rng) + ggplot(df, aes(x = x, y = y, fill = fn)) + + geom_raster(interpolate = TRUE) + + scale_fill_viridis_c(option = "plasma", name = "Norm.\nConc.", + limits = c(0, 1), na.value = "transparent") + + coord_equal() + theme_minimal(base_size = 8) + + labs(title = sprintf("L = %g", Lv), x = NULL, y = NULL) +}) + +wrap_plots(sw_plots, nrow = 1) + + plot_annotation( + title = "Green's function rapid sweep (method='green')", + subtitle = "No boundary conditions; fast; good for exploring L", + theme = theme(plot.title = element_text(face = "bold", size = 12), + plot.subtitle = element_text(size = 9, color = "grey40")) + ) +```