RaredonLab · RaredonLab · Apr 8, 2026 · Aug 9, 2025 · Aug 25, 2025 · Sep 26, 2025
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -3,3 +3,6 @@
 ^LICENSE\.md$
 ^doc$
 ^Meta$
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.gitignore b/.gitignore
@@ -49,3 +49,5 @@ po/*~
 rsconnect/
 inst/doc
 /Meta/
+/doc/
+# docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,20 +2,19 @@ Package: PathwayEmbed
 Title: Tools for Pathway-Level Embedding and Visualization in Single-Cell Data
 Version: 0.0.0.9000
 Authors@R: 
-    person("Yaqing", "Huang", email = "yaqing.huang@yale.edu", role = c("aut", "cre"))
+    person("Yaqing", "Huang", email = "hyaqing1023@gmail.com", role = c("aut", "cre"))
 Description: Provides tools for analyzing and visualizing pathway-level activity 
     in single-cell RNA-seq data. Includes functions for computing cell-wise pathway scores, 
     visualizing transduction states, calculating activation percentages, 
     and integrating pathway data with Seurat objects.
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Depends: 
     R (>= 3.5)
 Imports:
     readxl,
-    Seurat,
     RColorBrewer,
     ggplot2,
     cowplot,
@@ -26,10 +25,13 @@ Imports:
     effsize,
     tidyverse,
     purrr
-Suggests: 
-    knitr,
+Suggests:
+    Seurat,
+    scales,
     rmarkdown,
+    knitr,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 LazyData: true
 VignetteBuilder: knitr
+URL: https://raredonlab.github.io/PathwayEmbed
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,24 +2,20 @@
 
 export(CalculatePercentage)
 export(ComputeCellData)
+export(DataPreProcess)
+export(ListPathway)
 export(LoadPathway)
 export(PathwayMaxMin)
 export(PlotPathway)
 export(PreparePlotData)
-import(RColorBrewer)
-import(Seurat)
-import(cowplot)
 import(ggplot2)
-import(matrixStats)
-import(readxl)
-import(tidyverse)
-import(viridis)
-importFrom(dplyr,"%>%")
 importFrom(dplyr,bind_rows)
 importFrom(effsize,cohen.d)
 importFrom(matrixStats,rowMaxs)
 importFrom(matrixStats,rowMins)
-importFrom(purrr,map)
-importFrom(stats,cmdscale)
-importFrom(stats,dist)
+importFrom(readxl,excel_sheets)
+importFrom(readxl,read_excel)
+importFrom(stats,kruskal.test)
 importFrom(stats,na.omit)
+importFrom(stats,pairwise.wilcox.test)
+importFrom(stats,wilcox.test)
diff --git a/R/CalculatePercentage.R b/R/CalculatePercentage.R
@@ -1,63 +1,103 @@
 #' CalculatePercentage
 #'
-#' This function calculates the percentage of cells in ON (scale > 0) and OFF (scale < 0)
-#' activation states within each group defined by `group_var`. If exactly two groups
-#' are provided, it also computes Cohen's d effect size between their activation values.
+#' Calculates the percentage of cells in ON (\code{scale > 0}) and OFF
+#' (\code{scale < 0}) activation states within each group defined by
+#' \code{group_var}.
+#'
+#' If exactly two groups are provided, Cohen's d effect size and a Wilcoxon
+#' rank-sum p-value are computed between the two groups.
+#'
+#' If more than two groups are provided, a Kruskal-Wallis p-value is computed
+#' across all groups, and pairwise Wilcoxon p-values (Bonferroni-corrected) are
+#' attached as an attribute.
+#'
+#' @param to.plot A data frame from \code{PreparePlotData()}, containing at
+#'   least a \code{scale} column and the grouping column specified by
+#'   \code{group_var}.
+#' @param group_var A character string specifying the grouping column in
+#'   \code{to.plot} (e.g. \code{"genotype"}, \code{"treatment"}).
+#'
+#' @return A data frame with columns:
+#' \describe{
+#'   \item{group}{Group label.}
+#'   \item{percentage_on}{Percentage of cells with \code{scale > 0}.}
+#'   \item{percentage_off}{Percentage of cells with \code{scale < 0}.}
+#'   \item{cohens_d}{(2-group only) Cohen's d effect size. Repeated for
+#'     both group rows as it is a single pairwise estimate.}
+#'   \item{p_value}{(2-group only) Wilcoxon rank-sum p-value.}
+#'   \item{kruskal_p}{(3+ groups only) Kruskal-Wallis p-value.}
+#' }
+#' For 3+ groups, Bonferroni-corrected pairwise Wilcoxon p-values are
+#' attached via \code{attr(result, "pairwise_wilcox")}.
 #'
-#' @name CalculatePercentage
 #' @importFrom dplyr bind_rows
 #' @importFrom effsize cohen.d
-#' @importFrom stats na.omit
-#' @param to.plot A data frame containing at least a `scale` column and a grouping column.
-#' @param group_var A string specifying the grouping variable (e.g., "genotype", "treatment").
+#' @importFrom stats na.omit wilcox.test kruskal.test pairwise.wilcox.test
 #'
-#' @return A data frame with the percentage of ON/OFF cells and Cohen's d (if applicable).
 #' @examples
+#' \dontrun{
 #' data(fake_to_plot)
 #' CalculatePercentage(fake_to_plot, "genotype")
+#' }
+#'
 #' @export
-CalculatePercentage <- function(to.plot, group_var){
-  # Make sure there is scale data
-  stopifnot("scale" %in% names(to.plot))
+CalculatePercentage <- function(to.plot, group_var) {
 
-  # Make sure no NA
-  groups <- unique(na.omit(to.plot[[group_var]]))
+  if (!"scale" %in% names(to.plot)) {
+    stop("Column 'scale' not found in to.plot. Use PreparePlotData() first.")
+  }
+  if (!group_var %in% names(to.plot)) {
+    stop("Grouping column '", group_var, "' not found in to.plot.")
+  }
+
+  groups  <- unique(na.omit(to.plot[[group_var]]))
   results <- list()
 
+  # --- Per-group ON/OFF percentages ---
   for (g in groups) {
-    subset_data <- to.plot[to.plot[[group_var]] == g, ]
+    subset_data <- to.plot[to.plot[[group_var]] == g & !is.na(to.plot[[group_var]]), ]
     total <- nrow(subset_data)
-
-    # Calculate how many cells are in on/off status
-    on <- sum(subset_data[["scale"]] > 0, na.rm = TRUE)
+    if (total == 0) {
+      warning("Group '", g, "' has no cells after NA removal; skipping.")
+      next
+    }
+    on  <- sum(subset_data[["scale"]] > 0, na.rm = TRUE)
     off <- sum(subset_data[["scale"]] < 0, na.rm = TRUE)
-
-    # Calculate percentages of on/off cells
     results[[as.character(g)]] <- list(
-      percentage_on = round(100 * on / total, 2),
+      percentage_on  = round(100 * on  / total, 2),
       percentage_off = round(100 * off / total, 2)
     )
   }
 
-  # When there are two groups in comparison, Cohen's d — a measure of effect size — will be applied for statistic purpose
+  # --- Statistics ---
   if (length(groups) == 2) {
-    g1 <- groups[1]
-    g2 <- groups[2]
+    g1   <- groups[1]
+    g2   <- groups[2]
     vec1 <- to.plot[to.plot[[group_var]] == g1, "scale"]
     vec2 <- to.plot[to.plot[[group_var]] == g2, "scale"]
-
-    # Computes Cohen's d between two numeric vectors (vec1 and vec2) and extracts the estimated value of the effect size.
     cohens_d_val <- cohen.d(vec1, vec2)$estimate
-    # |d value|: 0 - 0.2, effect size is negligible
-    # |d value|: 0.2 - 0.5: small effect
-    # |d value|: 0.5 - 0.8: medium effect
-    # |d value|: > 0.8: large effect
+    p_val        <- wilcox.test(vec1, vec2)$p.value
+    for (g in groups) {
+      results[[as.character(g)]]$cohens_d <- cohens_d_val
+      results[[as.character(g)]]$p_value  <- p_val
+    }
+
+  } else if (length(groups) > 2) {
+    kw_p <- kruskal.test(to.plot[["scale"]], to.plot[[group_var]])$p.value
+    pw   <- pairwise.wilcox.test(
+      to.plot[["scale"]],
+      to.plot[[group_var]],
+      p.adjust.method = "bonferroni"
+    )
+    for (g in groups) {
+      results[[as.character(g)]]$kruskal_p <- kw_p
+    }
 
-    results[[as.character(g1)]]$cohens_d <- cohens_d_val
-    results[[as.character(g2)]]$cohens_d <- cohens_d_val
+    # Build output and attach pairwise results in same scope as pw
+    df <- bind_rows(results, .id = "group")
+    attr(df, "pairwise_wilcox") <- pw$p.value
+    return(df)
   }
 
-  # Make a dataframe for the output
-  df <- bind_rows(results, .id = "group")
-  return(df)
+  bind_rows(results, .id = "group")
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -49,3 +49,5 @@ po/*~ @@
     rsconnect/
     inst/doc
     /Meta/
+    /doc/
+    # docs