Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@
^LICENSE\.md$
^doc$
^Meta$
^_pkgdown\.yml$
^docs$
^pkgdown$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ po/*~
rsconnect/
inst/doc
/Meta/
/doc/
# docs
12 changes: 7 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@ Package: PathwayEmbed
Title: Tools for Pathway-Level Embedding and Visualization in Single-Cell Data
Version: 0.0.0.9000
Authors@R:
person("Yaqing", "Huang", email = "yaqing.huang@yale.edu", role = c("aut", "cre"))
person("Yaqing", "Huang", email = "hyaqing1023@gmail.com", role = c("aut", "cre"))
Description: Provides tools for analyzing and visualizing pathway-level activity
in single-cell RNA-seq data. Includes functions for computing cell-wise pathway scores,
visualizing transduction states, calculating activation percentages,
and integrating pathway data with Seurat objects.
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
Depends:
R (>= 3.5)
Imports:
readxl,
Seurat,
RColorBrewer,
ggplot2,
cowplot,
Expand All @@ -26,10 +25,13 @@ Imports:
effsize,
tidyverse,
purrr
Suggests:
knitr,
Suggests:
Seurat,
scales,
rmarkdown,
knitr,
testthat (>= 3.0.0)
Config/testthat/edition: 3
LazyData: true
VignetteBuilder: knitr
URL: https://raredonlab.github.io/PathwayEmbed
18 changes: 7 additions & 11 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,20 @@

export(CalculatePercentage)
export(ComputeCellData)
export(DataPreProcess)
export(ListPathway)
export(LoadPathway)
export(PathwayMaxMin)
export(PlotPathway)
export(PreparePlotData)
import(RColorBrewer)
import(Seurat)
import(cowplot)
import(ggplot2)
import(matrixStats)
import(readxl)
import(tidyverse)
import(viridis)
importFrom(dplyr,"%>%")
importFrom(dplyr,bind_rows)
importFrom(effsize,cohen.d)
importFrom(matrixStats,rowMaxs)
importFrom(matrixStats,rowMins)
importFrom(purrr,map)
importFrom(stats,cmdscale)
importFrom(stats,dist)
importFrom(readxl,excel_sheets)
importFrom(readxl,read_excel)
importFrom(stats,kruskal.test)
importFrom(stats,na.omit)
importFrom(stats,pairwise.wilcox.test)
importFrom(stats,wilcox.test)
108 changes: 74 additions & 34 deletions R/CalculatePercentage.R
Original file line number Diff line number Diff line change
@@ -1,63 +1,103 @@
#' CalculatePercentage
#'
#' This function calculates the percentage of cells in ON (scale > 0) and OFF (scale < 0)
#' activation states within each group defined by `group_var`. If exactly two groups
#' are provided, it also computes Cohen's d effect size between their activation values.
#' Calculates the percentage of cells in ON (\code{scale > 0}) and OFF
#' (\code{scale < 0}) activation states within each group defined by
#' \code{group_var}.
#'
#' If exactly two groups are provided, Cohen's d effect size and a Wilcoxon
#' rank-sum p-value are computed between the two groups.
#'
#' If more than two groups are provided, a Kruskal-Wallis p-value is computed
#' across all groups, and pairwise Wilcoxon p-values (Bonferroni-corrected) are
#' attached as an attribute.
#'
#' @param to.plot A data frame from \code{PreparePlotData()}, containing at
#' least a \code{scale} column and the grouping column specified by
#' \code{group_var}.
#' @param group_var A character string specifying the grouping column in
#' \code{to.plot} (e.g. \code{"genotype"}, \code{"treatment"}).
#'
#' @return A data frame with columns:
#' \describe{
#' \item{group}{Group label.}
#' \item{percentage_on}{Percentage of cells with \code{scale > 0}.}
#' \item{percentage_off}{Percentage of cells with \code{scale < 0}.}
#' \item{cohens_d}{(2-group only) Cohen's d effect size. Repeated for
#' both group rows as it is a single pairwise estimate.}
#' \item{p_value}{(2-group only) Wilcoxon rank-sum p-value.}
#' \item{kruskal_p}{(3+ groups only) Kruskal-Wallis p-value.}
#' }
#' For 3+ groups, Bonferroni-corrected pairwise Wilcoxon p-values are
#' attached via \code{attr(result, "pairwise_wilcox")}.
#'
#' @name CalculatePercentage
#' @importFrom dplyr bind_rows
#' @importFrom effsize cohen.d
#' @importFrom stats na.omit
#' @param to.plot A data frame containing at least a `scale` column and a grouping column.
#' @param group_var A string specifying the grouping variable (e.g., "genotype", "treatment").
#' @importFrom stats na.omit wilcox.test kruskal.test pairwise.wilcox.test
#'
#' @return A data frame with the percentage of ON/OFF cells and Cohen's d (if applicable).
#' @examples
#' \dontrun{
#' data(fake_to_plot)
#' CalculatePercentage(fake_to_plot, "genotype")
#' }
#'
#' @export
CalculatePercentage <- function(to.plot, group_var){
# Make sure there is scale data
stopifnot("scale" %in% names(to.plot))
CalculatePercentage <- function(to.plot, group_var) {

# Make sure no NA
groups <- unique(na.omit(to.plot[[group_var]]))
if (!"scale" %in% names(to.plot)) {
stop("Column 'scale' not found in to.plot. Use PreparePlotData() first.")
}
if (!group_var %in% names(to.plot)) {
stop("Grouping column '", group_var, "' not found in to.plot.")
}

groups <- unique(na.omit(to.plot[[group_var]]))
results <- list()

# --- Per-group ON/OFF percentages ---
for (g in groups) {
subset_data <- to.plot[to.plot[[group_var]] == g, ]
subset_data <- to.plot[to.plot[[group_var]] == g & !is.na(to.plot[[group_var]]), ]
total <- nrow(subset_data)

# Calculate how many cells are in on/off status
on <- sum(subset_data[["scale"]] > 0, na.rm = TRUE)
if (total == 0) {
warning("Group '", g, "' has no cells after NA removal; skipping.")
next
}
on <- sum(subset_data[["scale"]] > 0, na.rm = TRUE)
off <- sum(subset_data[["scale"]] < 0, na.rm = TRUE)

# Calculate percentages of on/off cells
results[[as.character(g)]] <- list(
percentage_on = round(100 * on / total, 2),
percentage_on = round(100 * on / total, 2),
percentage_off = round(100 * off / total, 2)
)
}

# When there are two groups in comparison, Cohen's d — a measure of effect size — will be applied for statistic purpose
# --- Statistics ---
if (length(groups) == 2) {
g1 <- groups[1]
g2 <- groups[2]
g1 <- groups[1]
g2 <- groups[2]
vec1 <- to.plot[to.plot[[group_var]] == g1, "scale"]
vec2 <- to.plot[to.plot[[group_var]] == g2, "scale"]

# Computes Cohen's d between two numeric vectors (vec1 and vec2) and extracts the estimated value of the effect size.
cohens_d_val <- cohen.d(vec1, vec2)$estimate
# |d value|: 0 - 0.2, effect size is negligible
# |d value|: 0.2 - 0.5: small effect
# |d value|: 0.5 - 0.8: medium effect
# |d value|: > 0.8: large effect
p_val <- wilcox.test(vec1, vec2)$p.value
for (g in groups) {
results[[as.character(g)]]$cohens_d <- cohens_d_val
results[[as.character(g)]]$p_value <- p_val
}

} else if (length(groups) > 2) {
kw_p <- kruskal.test(to.plot[["scale"]], to.plot[[group_var]])$p.value
pw <- pairwise.wilcox.test(
to.plot[["scale"]],
to.plot[[group_var]],
p.adjust.method = "bonferroni"
)
for (g in groups) {
results[[as.character(g)]]$kruskal_p <- kw_p
}

results[[as.character(g1)]]$cohens_d <- cohens_d_val
results[[as.character(g2)]]$cohens_d <- cohens_d_val
# Build output and attach pairwise results in same scope as pw
df <- bind_rows(results, .id = "group")
attr(df, "pairwise_wilcox") <- pw$p.value
return(df)
}

# Make a dataframe for the output
df <- bind_rows(results, .id = "group")
return(df)
bind_rows(results, .id = "group")
}
Loading
Loading