Vitek-Lab · swaraj-neu · Jun 10, 2026 · tonywu1999 · Jun 11, 2026 · tonywu1999
diff --git a/R/annotateProteinInfoFromIndra.R b/R/annotateProteinInfoFromIndra.R
diff --git a/R/cytoscapeNetwork.R b/R/cytoscapeNetwork.R
@@ -7,13 +7,14 @@
 #' overlaps are surfaced as hover tooltips.
 #'
 #' @param nodes       Data frame with at minimum an \code{id} column.  Optional
-#'                    columns: \code{logFC} (numeric), \code{hgncName}
-#'                    (character), \code{Site} (character, underscore-separated
-#'                    PTM site list).
+#'                    columns: \code{logFC} (numeric), \code{entityName}
+#'                    (character; may be semicolon-joined for multi-grounded
+#'                    rows), \code{entityId} (character), \code{Site}
+#'                    (character, underscore-separated PTM site list).
 #' @param edges       Data frame with columns \code{source}, \code{target},
 #'                    \code{interaction}.  Optional: \code{site},
 #'                    \code{evidenceLink}.
-#' @param displayLabelType \code{"id"} (default) or \code{"hgncName"} –
+#' @param displayLabelType \code{"id"} (default) or \code{"entityName"} –
 #'                    controls which column is used as the visible node label.
 #' @param nodeFontSize Font size (px) for node labels.  Default \code{12}.
 #' @param layoutOptions Named list of dagre layout options to override the

diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R
@@ -3,10 +3,13 @@
 #' Using differential abundance results from MSstats, this function retrieves
 #' a subnetwork of protein interactions from INDRA database.
 #'
-#' @param input output of \code{\link[MSstats]{groupComparison}} function's 
-#' comparisionResult table, which contains a list of proteins and their 
-#' corresponding p-values, logFCs, along with additional HGNC ID and HGNC 
-#' name columns
+#' @param input output of \code{\link[MSstats]{groupComparison}} function's
+#' comparisionResult table, annotated by
+#' \code{\link{annotateProteinInfoFromIndra}}. Must contain \code{Protein},
+#' \code{EntityNamespace}, and \code{EntityId} columns (and typically also
+#' \code{EntityName}, \code{log2FC}, \code{adj.pvalue}). When an analyte
+#' grounds to multiple candidates the three \code{Entity*} columns are
+#' semicolon-joined and positionally aligned.
 #' @param protein_level_data output of the \code{\link[MSstats]{dataProcess}} 
 #' function's ProteinLevelData table, which contains a list of proteins and 
 #' their corresponding abundances.  Used for annotating correlation information 
@@ -72,7 +75,7 @@ getSubnetworkFromIndra <- function(input,
     direction = match.arg(direction)
     input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other, include_infinite_fc, direction)
     .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other)
-    res <- .callIndraCogexApi(input$HgncId, force_include_other)
+    res <- .callIndraCogexApi(input$EntityNamespace, input$EntityId, force_include_other)
     res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter)
     edges <- .constructEdgesDataFrame(res, input, protein_level_data)
     edges <- .filterEdgesDataFrame(edges, paper_count_cutoff, correlation_cutoff)

diff --git a/R/utils_annotateProteinInfoFromIndra.R b/R/utils_annotateProteinInfoFromIndra.R
@@ -254,32 +254,43 @@ INDRA_API_URL = "https://discovery.indra.bio"
     return(res)
 }
 
-#' Call gilda API to get HGNC IDs from HGNC names
-#' @param hgncNames list of hgnc names
-#' @return named character vector mapping HGNC names to HGNC IDs
+#' Call Gilda API to ground entity text against any namespace
+#'
+#' Posts each input text to Gilda's `ground_multi` endpoint and returns
+#' every grounding candidate per input (in Gilda's ranking order). When
+#' `keep_only` is set, candidates whose `term$db` does not match are
+#' filtered out. The canonical entity name is taken from `term$entry_name`
+#' when present, falling back to `term$text` (the input string).
+#' @param textInputs list of character strings to ground
+#' @param keep_only optional character; if non-NULL, only candidates whose
+#'        `term$db == keep_only` are retained
+#' @return Named list keyed by input text. Each value is a list with
+#'         three equal-length character vectors: `ns`, `id`, `name`,
+#'         positionally aligned across Gilda's returned candidates.
+#'         Texts with no surviving grounding are omitted from the result.
 #' @importFrom jsonlite toJSON
 #' @importFrom httr POST add_headers content
 #' @keywords internal
 #' @noRd
-.callGetHgncIdsFromGildaApi <- function(hgncNames) {
-    
-    if (!is.list(hgncNames)) {
+.callGroundEntitiesFromGildaApi <- function(textInputs, keep_only = NULL) {
+
+    if (!is.list(textInputs)) {
         stop("Input must be a list.")
     }
-    
-    if (any(!sapply(hgncNames, is.character))) {
-        stop("All elements in the list must be character strings representing hgnc names.")
+
+    if (any(!sapply(textInputs, is.character))) {
+        stop("All elements in the list must be character strings.")
     }
-    
-    if (length(hgncNames) == 0) {
+
+    if (length(textInputs) == 0) {
         stop("Input list must not be empty.")
     }
-    
+
     apiUrl <- file.path("https://grounding.indra.bio/", "ground_multi")
-    
-    requestBody <- lapply(hgncNames, function(hgnc_name) {
+
+    requestBody <- lapply(textInputs, function(text_input) {
         list(
-            text = hgnc_name,
+            text = text_input,
             organisms = list("9606")
         )
     })
@@ -296,27 +307,45 @@ INDRA_API_URL = "https://discovery.indra.bio"
         message("Error in API call: ", e)
         NULL
     })
-    
+
     if (is.null(res)) {
         return(NULL)
     }
-
-    hgnc_mapping <- character(0)
-
-    for (item in res) {
-        # Find the term where db == "HGNC"
-        hgnc_term <- NULL
+
+    grounding_map <- list()
+
+    for (i in seq_along(res)) {
+        item       <- res[[i]]
+        input_text <- as.character(textInputs[[i]])
+
+        ns_vec   <- character(0)
+        id_vec   <- character(0)
+        name_vec <- character(0)
+
         for (entry in item) {
-            if (!is.null(entry$term$db) && entry$term$db == "HGNC") {
-                hgnc_term <- entry$term
-                break
+            term <- entry$term
+            if (is.null(term) || is.null(term$db) || is.null(term$id)) next
+            if (!is.null(keep_only) && term$db != keep_only) next
+
+            entry_name <- if (!is.null(term$entry_name) && nzchar(term$entry_name)) {
+                term$entry_name
+            } else {
+                term$text
             }
+
+            ns_vec   <- c(ns_vec,   term$db)
+            id_vec   <- c(id_vec,   term$id)
+            name_vec <- c(name_vec, entry_name)
         }
-
-        # Only add to mapping if HGNC term was found
-        if (!is.null(hgnc_term)) {
-            hgnc_mapping[hgnc_term$text] <- hgnc_term$id
+
+        if (length(ns_vec) > 0) {
+            grounding_map[[input_text]] <- list(
+                ns   = ns_vec,
+                id   = id_vec,
+                name = name_vec
+            )
         }
     }
-    return(hgnc_mapping)
+
+    return(grounding_map)
 }
diff --git a/R/utils_cytoscapeNetwork.R b/R/utils_cytoscapeNetwork.R
@@ -238,35 +238,39 @@
         rep("#D3D3D3", nrow(nodes))
     }
 
-    label_col <- if (display_label_type == "hgncName" &&
-                     "hgncName" %in% names(nodes)) "hgncName" else "id"
-    
+    label_col <- if (display_label_type == "entityName" &&
+                     "entityName" %in% names(nodes)) "entityName" else "id"
+
     has_ptm_sites <- if ("Site" %in% names(nodes)) {
         unique(nodes$id[!is.na(nodes$Site) & trimws(nodes$Site) != ""])
     } else {
         character(0)
     }
-    
+
     elements        <- list()
     emitted_prots   <- character(0)
+    # `emitted_cpds` and `node_type = "compound"` below refer to Cytoscape
+    # grouping containers used to parent PTM satellite nodes around a protein.
+    # This Cytoscape "compound" concept is UNRELATED to the chemical
+    # `proteinIdType = "Compound"` analyte type in annotateProteinInfoFromIndra.
     emitted_cpds    <- character(0)
     emitted_ptm_n   <- character(0)
     emitted_ptm_e   <- character(0)
-    
+
     for (i in seq_len(nrow(nodes))) {
         row       <- nodes[i, , drop = FALSE]
         color     <- node_colors[i]
         has_site  <- "Site" %in% names(nodes) &&
             !is.na(row$Site) && trimws(row$Site) != ""
-        
-        display_label <- if (label_col == "hgncName" &&
-                             !is.na(row$hgncName) && row$hgncName != "")
-            row$hgncName else row$id
-        
+
+        display_label <- if (label_col == "entityName" &&
+                             !is.na(row$entityName) && row$entityName != "")
+            row$entityName else row$id
+
         needs_compound <- row$id %in% has_ptm_sites
         compound_id    <- paste0(row$id, "__compound__")
-        
-        # Compound container
+
+        # Cytoscape compound container (PTM grouping parent — not a chemical compound)
         if (needs_compound && !(compound_id %in% emitted_cpds)) {
             elements <- c(elements, list(
                 list(data = list(id        = compound_id,