From 24fff4a76bedb5cec4eb7ff8dfbaf0e450ed8dc8 Mon Sep 17 00:00:00 2001 From: Swaraj Patil Date: Wed, 10 Jun 2026 00:55:00 -0400 Subject: [PATCH] Add Compound proteinIdType and entity-agnostic grounding columns: Generalize the protein-only HgncId/HgncName contract into EntityNamespace/EntityId/EntityName grounded through Gilda, keeping multi-grounding as semicolon-joined aligned lists that fan out into the INDRA query. Gene-only annotations are skipped for compounds, and the new contract flows through annotateProteinInfoFromIndra, getSubnetworkFromIndra, and cytoscapeNetwork. --- R/annotateProteinInfoFromIndra.R | 209 +++++++++------- R/cytoscapeNetwork.R | 9 +- R/getSubnetworkFromIndra.R | 13 +- R/utils_annotateProteinInfoFromIndra.R | 89 ++++--- R/utils_cytoscapeNetwork.R | 28 ++- R/utils_getSubnetworkFromIndra.R | 151 +++++++++--- inst/extdata/groupComparisonModel.csv | 22 +- .../extdata/groupComparisonModel_compound.csv | 49 ++++ man/annotateProteinInfoFromIndra.Rd | 46 ++-- man/cytoscapeNetwork.Rd | 9 +- man/dot-populateEntityIdsInDataFrame.Rd | 21 ++ man/dot-populateEntityNamesInDataFrame.Rd | 21 ++ man/dot-populateHgncIdsInDataFrame.Rd | 20 -- man/dot-populateHgncNamesInDataFrame.Rd | 17 -- man/dot-populateKinaseInfoInDataFrame.Rd | 7 +- man/dot-populatePhophataseInfoInDataFrame.Rd | 7 +- ...ulateTranscriptionFactorInfoInDataFrame.Rd | 7 +- man/dot-populateUniprotIdsInDataFrame.Rd | 5 +- ...lidateAnnotateProteinInfoFromIndraInput.Rd | 6 +- man/exportNetworkToHTML.Rd | 9 +- man/getSubnetworkFromIndra.Rd | 11 +- man/previewNetworkInBrowser.Rd | 9 +- .../test-annotateProteinInfoFromIndra.R | 85 ++++--- tests/testthat/test-exportNetworkToHTML.R | 30 +-- tests/testthat/test-getSubnetworkFromIndra.R | 13 +- tests/testthat/test-multi-grounding.R | 226 ++++++++++++++++++ ...est-utils_annotateProteinInfoFromIndra.R.R | 38 ++- tests/testthat/test-utils_cytoscapeNetwork.R | 14 +- .../test-utils_getSubnetworkFromIndra.R | 7 +- vignettes/Cytoscape-Visualization.Rmd | 16 +- vignettes/MSstatsBioNet.Rmd | 2 +- vignettes/PTM-Analysis.Rmd | 2 +- 32 files changed, 863 insertions(+), 335 deletions(-) create mode 100644 inst/extdata/groupComparisonModel_compound.csv create mode 100644 man/dot-populateEntityIdsInDataFrame.Rd create mode 100644 man/dot-populateEntityNamesInDataFrame.Rd delete mode 100644 man/dot-populateHgncIdsInDataFrame.Rd delete mode 100644 man/dot-populateHgncNamesInDataFrame.Rd create mode 100644 tests/testthat/test-multi-grounding.R diff --git a/R/annotateProteinInfoFromIndra.R b/R/annotateProteinInfoFromIndra.R index b85497e..dd3b91d 100644 --- a/R/annotateProteinInfoFromIndra.R +++ b/R/annotateProteinInfoFromIndra.R @@ -1,22 +1,40 @@ #' Annotate Protein Information from Indra #' -#' This function annotates a data frame with protein information from Indra. -#' -#' @param df output of \code{\link[MSstats]{groupComparison}} function's -#' comparisonResult table, which contains a list of proteins and their -#' corresponding p-values, logFCs, along with additional HGNC ID and HGNC -#' name columns -#' @param proteinIdType A character string specifying the type of protein ID. -#' It can be either "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name". +#' This function annotates a data frame with entity (protein or compound) +#' grounding information from INDRA / Gilda, plus gene-only flags +#' (transcription factor / kinase / phosphatase) for the protein paths. +#' +#' @param df output of \code{\link[MSstats]{groupComparison}} function's +#' comparisonResult table. Must contain a \code{Protein} column whose +#' values are interpreted according to \code{proteinIdType}. +#' @param proteinIdType A character string specifying the type of analyte +#' identifier in the \code{Protein} column. One of +#' \code{"Uniprot"}, \code{"Uniprot_Mnemonic"}, \code{"Hgnc_Name"}, or +#' \code{"Compound"}. The \code{"Compound"} value treats inputs as +#' metabolite names and grounds them through Gilda, keeping whatever +#' namespace Gilda returns (CHEBI / PUBCHEM / CHEMBL / ...). #' @return A data frame with the following columns: #' \describe{ -#' \item{Protein}{Character. The original protein identifier.} -#' \item{UniprotID}{Character. The Uniprot ID of the protein.} -#' \item{HgncID}{Character. The HGNC ID of the protein.} -#' \item{HgncName}{Character. The HGNC name of the protein.} -#' \item{IsTranscriptionFactor}{Logical. Indicates if the protein is a transcription factor.} -#' \item{IsKinase}{Logical. Indicates if the protein is a kinase.} -#' \item{IsPhosphatase}{Logical. Indicates if the protein is a phosphatase.} +#' \item{Protein}{Character. The original identifier from the input.} +#' \item{GlobalProtein}{Character. The input identifier with the +#' MSstats mnemonic suffix stripped, used as the grounding key.} +#' \item{UniprotId}{Character. The Uniprot ID of the protein, or +#' \code{NA} for \code{"Hgnc_Name"} and \code{"Compound"} inputs.} +#' \item{EntityNamespace}{Character. The grounding namespace +#' (e.g. \code{"HGNC"}, \code{"CHEBI"}). When a single input grounds +#' to multiple candidates, namespaces are semicolon-joined and +#' positionally aligned with \code{EntityId} and \code{EntityName}.} +#' \item{EntityId}{Character. The bare grounding identifier within its +#' namespace (e.g. \code{"1097"} for HGNC, \code{"28748"} for +#' CHEBI). Semicolon-joined when multi-grounded.} +#' \item{EntityName}{Character. The canonical display name from the +#' grounding source. Semicolon-joined when multi-grounded.} +#' \item{IsTranscriptionFactor}{Logical. \code{NA} for +#' \code{proteinIdType == "Compound"}.} +#' \item{IsKinase}{Logical. \code{NA} for +#' \code{proteinIdType == "Compound"}.} +#' \item{IsPhosphatase}{Logical. \code{NA} for +#' \code{proteinIdType == "Compound"}.} #' } #' @examples #' df <- data.frame(Protein = c("CLH1_HUMAN")) @@ -24,35 +42,36 @@ #' head(annotated_df) #' @export annotateProteinInfoFromIndra <- function(df, proteinIdType) { - .validateAnnotateProteinInfoFromIndraInput(df) + .validateAnnotateProteinInfoFromIndraInput(df, proteinIdType) df <- .populateUniprotIdsInDataFrame(df, proteinIdType) - df <- .populateHgncIdsInDataFrame(df, proteinIdType) - df <- .populateHgncNamesInDataFrame(df) - df <- .populateTranscriptionFactorInfoInDataFrame(df) - df <- .populateKinaseInfoInDataFrame(df) - df <- .populatePhophataseInfoInDataFrame(df) + df <- .populateEntityIdsInDataFrame(df, proteinIdType) + df <- .populateEntityNamesInDataFrame(df) + df <- .populateTranscriptionFactorInfoInDataFrame(df, proteinIdType) + df <- .populateKinaseInfoInDataFrame(df, proteinIdType) + df <- .populatePhophataseInfoInDataFrame(df, proteinIdType) return(df) } #' Validate Annotate Protein Info Input #' -#' This function validates the input data frame for the annotateProteinInfoFromIndra function. -#' #' @param df A data frame containing protein information. +#' @param proteinIdType The proteinIdType supplied by the caller. #' @return None. Throws an error if validation fails. -.validateAnnotateProteinInfoFromIndraInput <- function(df) { +.validateAnnotateProteinInfoFromIndraInput <- function(df, proteinIdType) { if (!"Protein" %in% colnames(df)) { stop("Input dataframe must contain 'Protein' column.") } + allowed <- c("Uniprot", "Uniprot_Mnemonic", "Hgnc_Name", "Compound") + if (length(proteinIdType) != 1 || !proteinIdType %in% allowed) { + stop("Invalid proteinIdType '", proteinIdType, "'. ", + "Must be one of: ", paste(allowed, collapse = ", "), ".") + } } #' Populate Uniprot IDs in Data Frame #' -#' This function populates the Uniprot IDs in the data frame based on the protein ID type. -#' #' @param df A data frame containing protein information. -#' @param proteinIdType A character string specifying the type of protein ID. -#' It can be either "Uniprot" or "Uniprot_Mnemonic". +#' @param proteinIdType A character string specifying the type of protein ID. #' @return A data frame with populated Uniprot IDs. .populateUniprotIdsInDataFrame <- function(df, proteinIdType) { if ("GlobalProtein" %in% colnames(df)) { @@ -68,7 +87,7 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { if (proteinIdType == "Uniprot") { df$UniprotId <- as.character(df$GlobalProtein) } - + if (proteinIdType == "Uniprot_Mnemonic") { mnemonicProteins <- protein_ids if (length(mnemonicProteins) > 0) { @@ -80,23 +99,26 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { } } } - - if (proteinIdType == "Hgnc_Name") { + + if (proteinIdType == "Hgnc_Name" || proteinIdType == "Compound") { df$UniprotId <- NA } return(df) } -#' Populate HGNC IDs in Data Frame +#' Populate Entity IDs and namespaces in Data Frame #' -#' This function populates the HGNC IDs in the data frame based on the Uniprot IDs. +#' Sets \code{EntityNamespace} and \code{EntityId}. For Gilda-sourced rows +#' (\code{"Hgnc_Name"}, \code{"Compound"}) also sets \code{EntityName} from +#' the same response, avoiding a second name lookup. #' #' @param df A data frame containing protein information. -#' @param proteinIdType A character string specifying the type of protein ID. -#' It can be either "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name". -#' @return A data frame with populated HGNC IDs. -.populateHgncIdsInDataFrame <- function(df, proteinIdType) { - df$HgncId <- NA +#' @param proteinIdType A character string specifying the type of protein ID. +#' @return A data frame with populated entity grounding columns. +.populateEntityIdsInDataFrame <- function(df, proteinIdType) { + df$EntityNamespace <- NA + df$EntityId <- NA + df$EntityName <- NA if (proteinIdType == "Uniprot" || proteinIdType == "Uniprot_Mnemonic") { validMask <- !is.na(df$UniprotId) validUniprots <- unique(df$UniprotId[validMask]) @@ -104,40 +126,52 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { hgncMapping <- .callGetHgncIdsFromUniprotIdsApi(as.list(validUniprots)) for (uniprotId in names(hgncMapping)) { if (!is.null(hgncMapping[[uniprotId]])) { - df$HgncId[df$UniprotId == uniprotId] <- hgncMapping[[uniprotId]] + df$EntityNamespace[df$UniprotId == uniprotId] <- "HGNC" + df$EntityId[df$UniprotId == uniprotId] <- hgncMapping[[uniprotId]] } } } } else { - hgncNames <- unique(df$GlobalProtein) - if (length(hgncNames) > 0) { - hgncMapping <- .callGetHgncIdsFromGildaApi(as.list(hgncNames)) - for (hgncName in names(hgncMapping)) { - if (!is.null(hgncMapping[[hgncName]])) { - df$HgncId[df$GlobalProtein == hgncName] <- hgncMapping[[hgncName]] + keep_only <- if (proteinIdType == "Hgnc_Name") "HGNC" else NULL + textInputs <- unique(df$GlobalProtein) + if (length(textInputs) > 0) { + grounding_map <- .callGroundEntitiesFromGildaApi( + as.list(textInputs), keep_only = keep_only) + if (!is.null(grounding_map)) { + for (input_text in names(grounding_map)) { + g <- grounding_map[[input_text]] + stopifnot(length(g$ns) == length(g$id), + length(g$ns) == length(g$name)) + row_mask <- df$GlobalProtein == input_text + df$EntityNamespace[row_mask] <- paste(g$ns, collapse = ";") + df$EntityId[row_mask] <- paste(g$id, collapse = ";") + df$EntityName[row_mask] <- paste(g$name, collapse = ";") } } } } - return(df) } -#' Populate HGNC Names in Data Frame +#' Populate Entity Names in Data Frame #' -#' This function populates the HGNC names in the data frame based on the HGNC IDs. +#' Fills \code{EntityName} for rows whose name was not set by the IDs step. +#' In practice this covers the UniProt / Uniprot_Mnemonic paths, where +#' \code{EntityId} is a single bare HGNC id; the HGNC names API is queried. +#' Gilda-sourced rows already have \code{EntityName} populated and are +#' skipped. #' #' @param df A data frame containing protein information. -#' @return A data frame with populated HGNC names. -.populateHgncNamesInDataFrame <- function(df) { - df$HgncName <- NA - validHgncMask <- !is.na(df$HgncId) - validHgncs <- unique(df$HgncId[validHgncMask]) - if (length(validHgncs) > 0) { - nameMapping <- .callGetHgncNamesFromHgncIdsApi(as.list(validHgncs)) - for (hgncId in names(nameMapping)) { - if (!is.null(nameMapping[[hgncId]])) { - df$HgncName[df$HgncId == hgncId] <- nameMapping[[hgncId]] +#' @return A data frame with populated entity names. +.populateEntityNamesInDataFrame <- function(df) { + needsLookup <- is.na(df$EntityName) & !is.na(df$EntityId) + if (any(needsLookup)) { + validIds <- unique(df$EntityId[needsLookup]) + nameMapping <- .callGetHgncNamesFromHgncIdsApi(as.list(validIds)) + for (entityId in names(nameMapping)) { + if (!is.null(nameMapping[[entityId]])) { + row_mask <- needsLookup & df$EntityId == entityId + df$EntityName[row_mask] <- nameMapping[[entityId]] } } } @@ -146,20 +180,23 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { #' Populate Transcription Factor Info in Data Frame #' -#' This function populates the transcription factor information in the data frame based on the HGNC names. -#' #' @param df A data frame containing protein information. +#' @param proteinIdType The proteinIdType supplied by the caller. Gene-only +#' flags are \code{NA} (no API call) when this is \code{"Compound"}. #' @return A data frame with populated transcription factor information. -.populateTranscriptionFactorInfoInDataFrame <- function(df) { +.populateTranscriptionFactorInfoInDataFrame <- function(df, proteinIdType) { df$IsTranscriptionFactor <- NA - validNameMask <- !is.na(df$HgncName) - validNames <- unique(df$HgncName[validNameMask]) + if (proteinIdType == "Compound") { + return(df) + } + validNameMask <- !is.na(df$EntityName) + validNames <- unique(df$EntityName[validNameMask]) if (length(validNames) > 0) { validNamesList <- as.list(validNames) charMapping <- .callIsTranscriptionFactorApi(validNamesList) - for (hgncName in names(charMapping)) { - if (!is.null(charMapping[[hgncName]])) { - df$IsTranscriptionFactor[df$HgncName == hgncName] <- charMapping[[hgncName]] + for (entityName in names(charMapping)) { + if (!is.null(charMapping[[entityName]])) { + df$IsTranscriptionFactor[df$EntityName == entityName] <- charMapping[[entityName]] } } } @@ -168,20 +205,23 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { #' Populate Kinase Info in Data Frame #' -#' This function populates the kinase information in the data frame based on the HGNC names. -#' #' @param df A data frame containing protein information. +#' @param proteinIdType The proteinIdType supplied by the caller. Gene-only +#' flags are \code{NA} (no API call) when this is \code{"Compound"}. #' @return A data frame with populated kinase information. -.populateKinaseInfoInDataFrame <- function(df) { +.populateKinaseInfoInDataFrame <- function(df, proteinIdType) { df$IsKinase <- NA - validNameMask <- !is.na(df$HgncName) - validNames <- unique(df$HgncName[validNameMask]) + if (proteinIdType == "Compound") { + return(df) + } + validNameMask <- !is.na(df$EntityName) + validNames <- unique(df$EntityName[validNameMask]) if (length(validNames) > 0) { validNamesList <- as.list(validNames) charMapping <- .callIsKinaseApi(validNamesList) - for (hgncName in names(charMapping)) { - if (!is.null(charMapping[[hgncName]])) { - df$IsKinase[df$HgncName == hgncName] <- charMapping[[hgncName]] + for (entityName in names(charMapping)) { + if (!is.null(charMapping[[entityName]])) { + df$IsKinase[df$EntityName == entityName] <- charMapping[[entityName]] } } } @@ -190,22 +230,25 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) { #' Populate Phosphatase Info in Data Frame #' -#' This function populates the phosphatase information in the data frame based on the HGNC names. -#' #' @param df A data frame containing protein information. +#' @param proteinIdType The proteinIdType supplied by the caller. Gene-only +#' flags are \code{NA} (no API call) when this is \code{"Compound"}. #' @return A data frame with populated phosphatase information. -.populatePhophataseInfoInDataFrame <- function(df) { +.populatePhophataseInfoInDataFrame <- function(df, proteinIdType) { df$IsPhosphatase <- NA - validNameMask <- !is.na(df$HgncName) - validNames <- unique(df$HgncName[validNameMask]) + if (proteinIdType == "Compound") { + return(df) + } + validNameMask <- !is.na(df$EntityName) + validNames <- unique(df$EntityName[validNameMask]) if (length(validNames) > 0) { validNamesList <- as.list(validNames) charMapping <- .callIsPhosphataseApi(validNamesList) - for (hgncName in names(charMapping)) { - if (!is.null(charMapping[[hgncName]])) { - df$IsPhosphatase[df$HgncName == hgncName] <- charMapping[[hgncName]] + for (entityName in names(charMapping)) { + if (!is.null(charMapping[[entityName]])) { + df$IsPhosphatase[df$EntityName == entityName] <- charMapping[[entityName]] } } } return(df) -} \ No newline at end of file +} diff --git a/R/cytoscapeNetwork.R b/R/cytoscapeNetwork.R index 9a0a86a..e46fa7d 100644 --- a/R/cytoscapeNetwork.R +++ b/R/cytoscapeNetwork.R @@ -7,13 +7,14 @@ #' overlaps are surfaced as hover tooltips. #' #' @param nodes Data frame with at minimum an \code{id} column. Optional -#' columns: \code{logFC} (numeric), \code{hgncName} -#' (character), \code{Site} (character, underscore-separated -#' PTM site list). +#' columns: \code{logFC} (numeric), \code{entityName} +#' (character; may be semicolon-joined for multi-grounded +#' rows), \code{entityId} (character), \code{Site} +#' (character, underscore-separated PTM site list). #' @param edges Data frame with columns \code{source}, \code{target}, #' \code{interaction}. Optional: \code{site}, #' \code{evidenceLink}. -#' @param displayLabelType \code{"id"} (default) or \code{"hgncName"} – +#' @param displayLabelType \code{"id"} (default) or \code{"entityName"} – #' controls which column is used as the visible node label. #' @param nodeFontSize Font size (px) for node labels. Default \code{12}. #' @param layoutOptions Named list of dagre layout options to override the diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R index 8eea0a9..ec67bb6 100644 --- a/R/getSubnetworkFromIndra.R +++ b/R/getSubnetworkFromIndra.R @@ -3,10 +3,13 @@ #' Using differential abundance results from MSstats, this function retrieves #' a subnetwork of protein interactions from INDRA database. #' -#' @param input output of \code{\link[MSstats]{groupComparison}} function's -#' comparisionResult table, which contains a list of proteins and their -#' corresponding p-values, logFCs, along with additional HGNC ID and HGNC -#' name columns +#' @param input output of \code{\link[MSstats]{groupComparison}} function's +#' comparisionResult table, annotated by +#' \code{\link{annotateProteinInfoFromIndra}}. Must contain \code{Protein}, +#' \code{EntityNamespace}, and \code{EntityId} columns (and typically also +#' \code{EntityName}, \code{log2FC}, \code{adj.pvalue}). When an analyte +#' grounds to multiple candidates the three \code{Entity*} columns are +#' semicolon-joined and positionally aligned. #' @param protein_level_data output of the \code{\link[MSstats]{dataProcess}} #' function's ProteinLevelData table, which contains a list of proteins and #' their corresponding abundances. Used for annotating correlation information @@ -72,7 +75,7 @@ getSubnetworkFromIndra <- function(input, direction = match.arg(direction) input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other, include_infinite_fc, direction) .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other) - res <- .callIndraCogexApi(input$HgncId, force_include_other) + res <- .callIndraCogexApi(input$EntityNamespace, input$EntityId, force_include_other) res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter) edges <- .constructEdgesDataFrame(res, input, protein_level_data) edges <- .filterEdgesDataFrame(edges, paper_count_cutoff, correlation_cutoff) diff --git a/R/utils_annotateProteinInfoFromIndra.R b/R/utils_annotateProteinInfoFromIndra.R index 4fcba0d..b70a5b8 100644 --- a/R/utils_annotateProteinInfoFromIndra.R +++ b/R/utils_annotateProteinInfoFromIndra.R @@ -254,32 +254,43 @@ INDRA_API_URL = "https://discovery.indra.bio" return(res) } -#' Call gilda API to get HGNC IDs from HGNC names -#' @param hgncNames list of hgnc names -#' @return named character vector mapping HGNC names to HGNC IDs +#' Call Gilda API to ground entity text against any namespace +#' +#' Posts each input text to Gilda's `ground_multi` endpoint and returns +#' every grounding candidate per input (in Gilda's ranking order). When +#' `keep_only` is set, candidates whose `term$db` does not match are +#' filtered out. The canonical entity name is taken from `term$entry_name` +#' when present, falling back to `term$text` (the input string). +#' @param textInputs list of character strings to ground +#' @param keep_only optional character; if non-NULL, only candidates whose +#' `term$db == keep_only` are retained +#' @return Named list keyed by input text. Each value is a list with +#' three equal-length character vectors: `ns`, `id`, `name`, +#' positionally aligned across Gilda's returned candidates. +#' Texts with no surviving grounding are omitted from the result. #' @importFrom jsonlite toJSON #' @importFrom httr POST add_headers content #' @keywords internal #' @noRd -.callGetHgncIdsFromGildaApi <- function(hgncNames) { - - if (!is.list(hgncNames)) { +.callGroundEntitiesFromGildaApi <- function(textInputs, keep_only = NULL) { + + if (!is.list(textInputs)) { stop("Input must be a list.") } - - if (any(!sapply(hgncNames, is.character))) { - stop("All elements in the list must be character strings representing hgnc names.") + + if (any(!sapply(textInputs, is.character))) { + stop("All elements in the list must be character strings.") } - - if (length(hgncNames) == 0) { + + if (length(textInputs) == 0) { stop("Input list must not be empty.") } - + apiUrl <- file.path("https://grounding.indra.bio/", "ground_multi") - - requestBody <- lapply(hgncNames, function(hgnc_name) { + + requestBody <- lapply(textInputs, function(text_input) { list( - text = hgnc_name, + text = text_input, organisms = list("9606") ) }) @@ -296,27 +307,45 @@ INDRA_API_URL = "https://discovery.indra.bio" message("Error in API call: ", e) NULL }) - + if (is.null(res)) { return(NULL) } - - hgnc_mapping <- character(0) - - for (item in res) { - # Find the term where db == "HGNC" - hgnc_term <- NULL + + grounding_map <- list() + + for (i in seq_along(res)) { + item <- res[[i]] + input_text <- as.character(textInputs[[i]]) + + ns_vec <- character(0) + id_vec <- character(0) + name_vec <- character(0) + for (entry in item) { - if (!is.null(entry$term$db) && entry$term$db == "HGNC") { - hgnc_term <- entry$term - break + term <- entry$term + if (is.null(term) || is.null(term$db) || is.null(term$id)) next + if (!is.null(keep_only) && term$db != keep_only) next + + entry_name <- if (!is.null(term$entry_name) && nzchar(term$entry_name)) { + term$entry_name + } else { + term$text } + + ns_vec <- c(ns_vec, term$db) + id_vec <- c(id_vec, term$id) + name_vec <- c(name_vec, entry_name) } - - # Only add to mapping if HGNC term was found - if (!is.null(hgnc_term)) { - hgnc_mapping[hgnc_term$text] <- hgnc_term$id + + if (length(ns_vec) > 0) { + grounding_map[[input_text]] <- list( + ns = ns_vec, + id = id_vec, + name = name_vec + ) } } - return(hgnc_mapping) + + return(grounding_map) } diff --git a/R/utils_cytoscapeNetwork.R b/R/utils_cytoscapeNetwork.R index f15418f..ca14a87 100644 --- a/R/utils_cytoscapeNetwork.R +++ b/R/utils_cytoscapeNetwork.R @@ -238,35 +238,39 @@ rep("#D3D3D3", nrow(nodes)) } - label_col <- if (display_label_type == "hgncName" && - "hgncName" %in% names(nodes)) "hgncName" else "id" - + label_col <- if (display_label_type == "entityName" && + "entityName" %in% names(nodes)) "entityName" else "id" + has_ptm_sites <- if ("Site" %in% names(nodes)) { unique(nodes$id[!is.na(nodes$Site) & trimws(nodes$Site) != ""]) } else { character(0) } - + elements <- list() emitted_prots <- character(0) + # `emitted_cpds` and `node_type = "compound"` below refer to Cytoscape + # grouping containers used to parent PTM satellite nodes around a protein. + # This Cytoscape "compound" concept is UNRELATED to the chemical + # `proteinIdType = "Compound"` analyte type in annotateProteinInfoFromIndra. emitted_cpds <- character(0) emitted_ptm_n <- character(0) emitted_ptm_e <- character(0) - + for (i in seq_len(nrow(nodes))) { row <- nodes[i, , drop = FALSE] color <- node_colors[i] has_site <- "Site" %in% names(nodes) && !is.na(row$Site) && trimws(row$Site) != "" - - display_label <- if (label_col == "hgncName" && - !is.na(row$hgncName) && row$hgncName != "") - row$hgncName else row$id - + + display_label <- if (label_col == "entityName" && + !is.na(row$entityName) && row$entityName != "") + row$entityName else row$id + needs_compound <- row$id %in% has_ptm_sites compound_id <- paste0(row$id, "__compound__") - - # Compound container + + # Cytoscape compound container (PTM grouping parent — not a chemical compound) if (needs_compound && !(compound_id %in% emitted_cpds)) { elements <- c(elements, list( list(data = list(id = compound_id, diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index 760b1f6..8d5fe19 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -7,10 +7,13 @@ #' @keywords internal #' @noRd .validateGetSubnetworkFromIndraInput <- function(input, protein_level_data, sources_filter, force_include_other) { - if (!"HgncId" %in% colnames(input)) { - stop("Invalid Input Error: Input must contain a column named 'HgncId'.") + if (!"EntityId" %in% colnames(input) || !"EntityNamespace" %in% colnames(input)) { + stop("Invalid Input Error: Input must contain columns named 'EntityId' and 'EntityNamespace'.") } - num_proteins = length(unique(input$HgncId)) + + ids_split <- unlist(strsplit(as.character(input$EntityId), ";"), use.names = FALSE) + nss_split <- unlist(strsplit(as.character(input$EntityNamespace), ";"), use.names = FALSE) + unique_pairs <- unique(paste(nss_split, ids_split, sep = ":")) + num_proteins = length(unique_pairs) + ifelse(!is.null(force_include_other), length(force_include_other), 0) if (num_proteins >= 400) { stop("Invalid Input Error: INDRA query must contain less than 400 proteins. Consider lowering your p-value cutoff") @@ -30,30 +33,72 @@ } } +#' Build the list of (namespace, id) groundings for the INDRA Cogex query +#' +#' Splits each row's semicolon-joined \code{EntityNamespace} / \code{EntityId} +#' positionally, fans out each pair into its own grounding node, then appends +#' any \code{force_include_other} entries (parsed as \code{"namespace:id"}), +#' returning the unique set. Extracted from \code{.callIndraCogexApi} to keep +#' the network-free portion unit-testable. +#' @param namespaces character vector aligned with \code{ids} +#' @param ids character vector aligned with \code{namespaces} +#' @param force_include_other optional character vector of +#' \code{"namespace:id"} strings +#' @return list of two-element \code{list(namespace, id)} groundings +#' @keywords internal +#' @noRd +.buildCogexGroundings <- function(namespaces, ids, force_include_other = NULL) { + ns_split <- strsplit(as.character(namespaces), ";") + id_split <- strsplit(as.character(ids), ";") + if (length(ns_split) != length(id_split)) { + stop("EntityNamespace and EntityId must have the same length") + } + + pairs <- list() + for (i in seq_along(ns_split)) { + ns_i <- ns_split[[i]] + id_i <- id_split[[i]] + if (length(ns_i) != length(id_i)) { + stop("EntityNamespace and EntityId entries must be positionally aligned ", + "after splitting on ';' (mismatch at row ", i, ")") + } + for (k in seq_along(ns_i)) { + pairs <- c(pairs, list(list(ns_i[k], id_i[k]))) + } + } + + if (!is.null(force_include_other)) { + for (x in force_include_other) { + parts <- unlist(strsplit(x, ":")) + if (length(parts) != 2) { + stop(paste0("Invalid identifier format: ", x, + ". Expected format is 'namespace:identifier', e.g. 'HGNC:1234' or 'CHEBI:4911'.")) + } + pairs <- c(pairs, list(list(parts[1], parts[2]))) + } + } + + unique(pairs) +} + #' Call INDRA Cogex API and return response -#' @param hgncIds list of hgnc ids -#' @param force_include_other list of identifiers to include in the network +#' @param namespaces character vector of entity namespaces (semicolon-joined +#' for multi-grounded rows), aligned with \code{ids} +#' @param ids character vector of entity ids (semicolon-joined for +#' multi-grounded rows), aligned with \code{namespaces} +#' @param force_include_other list of \code{"namespace:id"} identifiers to +#' include in the network #' @return list of INDRA statements #' @importFrom jsonlite toJSON #' @importFrom httr POST add_headers content #' @keywords internal #' @noRd -.callIndraCogexApi <- function(hgncIds, force_include_other) { +.callIndraCogexApi <- function(namespaces, ids, force_include_other) { indraCogexUrl <- "https://discovery.indra.bio/api/indra_subnetwork_relations" - hgncIds = unique(hgncIds) - groundings <- lapply(hgncIds, function(x) list("HGNC", x)) - if (!is.null(force_include_other)) { - groundings <- c(groundings, lapply(force_include_other, function(x) { - parts <- unlist(strsplit(x, ":")) - if (length(parts) != 2) { - stop(paste0("Invalid identifier format: ", x, ". Expected format is 'namespace:identifier', e.g. 'HGNC:1234' or 'CHEBI:4911'.")) - } - list(parts[1], parts[2]) - })) - } - groundings <- list(nodes = groundings) + pairs <- .buildCogexGroundings(namespaces, ids, force_include_other) + groundings <- list(nodes = pairs) groundings <- jsonlite::toJSON(groundings, auto_unbox = TRUE) res <- POST( @@ -155,9 +200,28 @@ if (!is.character(force_include_other)) { stop("force_include_other must be a character vector") } - if ("HgncId" %in% colnames(input) && any(grepl("^HGNC:", force_include_other))) { - hgnc_ids_to_include <- gsub("^HGNC:", "", force_include_other[grepl("^HGNC:", force_include_other)]) - exempt_proteins <- input[input$HgncId %in% hgnc_ids_to_include, ] + if ("EntityId" %in% colnames(input) && "EntityNamespace" %in% colnames(input)) { + fio_pairs <- lapply(force_include_other, function(x) { + parts <- unlist(strsplit(x, ":")) + if (length(parts) == 2) list(ns = parts[1], id = parts[2]) else NULL + }) + fio_pairs <- Filter(Negate(is.null), fio_pairs) + if (length(fio_pairs) > 0) { + row_matches_fio <- vapply(seq_len(nrow(input)), function(i) { + row_ns <- unlist(strsplit(as.character(input$EntityNamespace[i]), ";")) + row_id <- unlist(strsplit(as.character(input$EntityId[i]), ";")) + if (length(row_ns) != length(row_id) || length(row_ns) == 0) { + return(FALSE) + } + for (p in fio_pairs) { + if (any(row_ns == p$ns & row_id == p$id)) return(TRUE) + } + FALSE + }, logical(1)) + exempt_proteins <- input[row_matches_fio, ] + } else { + exempt_proteins <- data.frame() + } } else { exempt_proteins <- data.frame() } @@ -214,6 +278,24 @@ return(input) } +#' Row-level membership check for a (namespace, id) endpoint +#' +#' Splits each row's \code{EntityNamespace}/\code{EntityId} on \code{";"} +#' and returns \code{TRUE} for rows whose grounding list contains the +#' (\code{edge_ns}, \code{edge_id}) pair. Used to map an INDRA edge +#' endpoint back to the original \code{Protein} value. +#' @keywords internal +#' @noRd +.rowMatchesEndpoint <- function(input, edge_ns, edge_id) { + row_ns <- strsplit(as.character(input$EntityNamespace), ";") + row_id <- strsplit(as.character(input$EntityId), ";") + vapply(seq_along(row_ns), function(i) { + rns <- row_ns[[i]]; rid <- row_id[[i]] + if (length(rns) != length(rid) || length(rns) == 0) return(FALSE) + any(rns == edge_ns & rid == edge_id) + }, logical(1)) +} + #' Add additional metadata to an edge #' @param edge object representation of an INDRA statement #' @param input filtered groupComparison result @@ -227,24 +309,26 @@ edge$target_id, "@", edge$target_ns, "&format=html", sep = "" ) - - # Convert back to uniprot IDs - matched_rows_source <- input[which(input$HgncId == edge$source_id), ] + + # Map the grounded INDRA endpoint back to the original Protein value. + # Membership-test against each row's semicolon-split (namespace, id) + # pairs, using INDRA's source_ns/target_ns for namespace-aware disambiguation. + matched_rows_source <- input[.rowMatchesEndpoint(input, edge$source_ns, edge$source_id), ] uniprot_ids_source <- unique(matched_rows_source$Protein) if (length(uniprot_ids_source) != 1) { edge$source_uniprot_id <- edge$source_name } else { edge$source_uniprot_id <- uniprot_ids_source } - - matched_rows_target <- input[which(input$HgncId == edge$target_id), ] + + matched_rows_target <- input[.rowMatchesEndpoint(input, edge$target_ns, edge$target_id), ] uniprot_ids_target = unique(matched_rows_target$Protein) if (length(uniprot_ids_target) != 1) { edge$target_uniprot_id <- edge$target_name } else { edge$target_uniprot_id <- uniprot_ids_target } - + return(edge) } @@ -346,15 +430,16 @@ #' @keywords internal #' @noRd .constructNodesDataFrame <- function(input, edges) { - nodes = input[, c("Protein", "HgncName", "Site", "log2FC", "adj.pvalue")] - colnames(nodes) = c("id", "hgncName", "Site", "logFC", "adj.pvalue") - + nodes = input[, c("Protein", "EntityName", "EntityId", "Site", "log2FC", "adj.pvalue")] + colnames(nodes) = c("id", "entityName", "entityId", "Site", "logFC", "adj.pvalue") + nodes = nodes[nodes$id %in% c(edges$source, edges$target), ] extra_force_include_other <- setdiff(unique(c(edges$source, edges$target)), nodes$id) if (length(extra_force_include_other) > 0) { extra_nodes <- data.frame( id = extra_force_include_other, - hgncName = NA, + entityName = NA, + entityId = NA, Site = NA, logFC = 0, adj.pvalue = 1, @@ -362,8 +447,8 @@ ) nodes <- rbind(nodes, extra_nodes) } - nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName) - + nodes$entityName = ifelse(is.na(nodes$entityName), nodes$id, nodes$entityName) + return(nodes) } diff --git a/inst/extdata/groupComparisonModel.csv b/inst/extdata/groupComparisonModel.csv index 8130e7d..99858e9 100644 --- a/inst/extdata/groupComparisonModel.csv +++ b/inst/extdata/groupComparisonModel.csv @@ -1,11 +1,11 @@ -"Protein","Label","log2FC","SE","Tvalue","DF","pvalue","adj.pvalue","issue","MissingPercentage","ImputationPercentage","UniprotId","HgncId","HgncName","IsTranscriptionFactor","IsKinase","IsPhosphatase" -"O00217","NAT vs T",2.02850314201335,0.436417673580968,4.64807743776446,4,0.00967535217825999,0.0138219316832286,NA,0.141666666666667,0.141666666666667,"O00217","7715","NDUFS8",FALSE,FALSE,FALSE -"O00330","NAT vs T",1.30009407966678,0.132065876574802,9.84428463570911,3,0.00222851705368443,0.00445703410736886,NA,0.208333333333333,0.108333333333333,"O00330","21350","PDHX",FALSE,FALSE,FALSE -"O60313","NAT vs T",0.929964149145019,0.238799176549731,3.89433566137674,4,0.0176257617090352,0.0195841796767058,NA,0.297560975609756,0.297560975609756,"O60313","8140","OPA1",FALSE,FALSE,FALSE -"O60879","NAT vs T",-1.94845111154526,0.169532275875415,-11.4930982993299,4,0.000327186780400002,0.00163593390200001,NA,0.0958333333333333,0.0958333333333333,"O60879","2877","DIAPH2",FALSE,FALSE,FALSE -"O75306","NAT vs T",2.47450401575372,0.353085688947952,7.00822517935155,4,0.00218250286400723,0.00445703410736886,NA,0.161111111111111,0.161111111111111,"O75306","7708","NDUFS2",FALSE,FALSE,FALSE -"P05023","NAT vs T",1.8391155141503,0.212205798248373,8.66666005043711,4,0.000975321873486923,0.00325107291162308,NA,0.117021276595745,0.117021276595745,"P05023","799","ATP1A1",FALSE,FALSE,FALSE -"P05067","NAT vs T",0.736001185192708,0.197348639310814,3.72944646470831,4,0.0203066621563015,0.0203066621563015,NA,0.21304347826087,0.21304347826087,"P05067","620","APP",FALSE,FALSE,FALSE -"P05090","NAT vs T",0.568395089497931,0.116751275137348,4.86842725125924,4,0.00822902980865092,0.0137150496810849,NA,0.0416666666666666,0.0416666666666667,"P05090","612","APOD",FALSE,FALSE,FALSE -"P05362","NAT vs T",-2.02229332006306,0.152590499735315,-13.2530748871716,4,0.000187317344272797,0.00163593390200001,NA,0.178260869565217,0.178260869565217,"P05362","5344","ICAM1",FALSE,FALSE,FALSE -"P08574","NAT vs T",0.953072919616727,0.228573640344465,4.16965367564006,4,0.0140351185508285,0.0175438981885356,NA,0.025,0.025,"P08574","2579","CYC1",FALSE,FALSE,FALSE +"Protein","Label","log2FC","SE","Tvalue","DF","pvalue","adj.pvalue","issue","MissingPercentage","ImputationPercentage","UniprotId","EntityNamespace","EntityId","EntityName","IsTranscriptionFactor","IsKinase","IsPhosphatase" +"O00217","NAT vs T",2.02850314201335,0.436417673580968,4.64807743776446,4,0.00967535217825999,0.0138219316832286,NA,0.141666666666667,0.141666666666667,"O00217","HGNC","7715","NDUFS8",FALSE,FALSE,FALSE +"O00330","NAT vs T",1.30009407966678,0.132065876574802,9.84428463570911,3,0.00222851705368443,0.00445703410736886,NA,0.208333333333333,0.108333333333333,"O00330","HGNC","21350","PDHX",FALSE,FALSE,FALSE +"O60313","NAT vs T",0.929964149145019,0.238799176549731,3.89433566137674,4,0.0176257617090352,0.0195841796767058,NA,0.297560975609756,0.297560975609756,"O60313","HGNC","8140","OPA1",FALSE,FALSE,FALSE +"O60879","NAT vs T",-1.94845111154526,0.169532275875415,-11.4930982993299,4,0.000327186780400002,0.00163593390200001,NA,0.0958333333333333,0.0958333333333333,"O60879","HGNC","2877","DIAPH2",FALSE,FALSE,FALSE +"O75306","NAT vs T",2.47450401575372,0.353085688947952,7.00822517935155,4,0.00218250286400723,0.00445703410736886,NA,0.161111111111111,0.161111111111111,"O75306","HGNC","7708","NDUFS2",FALSE,FALSE,FALSE +"P05023","NAT vs T",1.8391155141503,0.212205798248373,8.66666005043711,4,0.000975321873486923,0.00325107291162308,NA,0.117021276595745,0.117021276595745,"P05023","HGNC","799","ATP1A1",FALSE,FALSE,FALSE +"P05067","NAT vs T",0.736001185192708,0.197348639310814,3.72944646470831,4,0.0203066621563015,0.0203066621563015,NA,0.21304347826087,0.21304347826087,"P05067","HGNC","620","APP",FALSE,FALSE,FALSE +"P05090","NAT vs T",0.568395089497931,0.116751275137348,4.86842725125924,4,0.00822902980865092,0.0137150496810849,NA,0.0416666666666666,0.0416666666666667,"P05090","HGNC","612","APOD",FALSE,FALSE,FALSE +"P05362","NAT vs T",-2.02229332006306,0.152590499735315,-13.2530748871716,4,0.000187317344272797,0.00163593390200001,NA,0.178260869565217,0.178260869565217,"P05362","HGNC","5344","ICAM1",FALSE,FALSE,FALSE +"P08574","NAT vs T",0.953072919616727,0.228573640344465,4.16965367564006,4,0.0140351185508285,0.0175438981885356,NA,0.025,0.025,"P08574","HGNC","2579","CYC1",FALSE,FALSE,FALSE diff --git a/inst/extdata/groupComparisonModel_compound.csv b/inst/extdata/groupComparisonModel_compound.csv new file mode 100644 index 0000000..3b6d607 --- /dev/null +++ b/inst/extdata/groupComparisonModel_compound.csv @@ -0,0 +1,49 @@ +Protein,Label,log2FC,SE,Tvalue,DF,pvalue,adj.pvalue,issue,MissingPercentage,ImputationPercentage +(2S)-2-[[(2S)-2-azaniumyl-3-phenylpropanoyl]amino]-3-phenylpropanoate,M1_vs_PA14,-1.18088373155594,0.146099220667978,-8.08275175019303,5,0.000469673968447104,0.00107836227834688,,0,0 +(2S)-2-[[(2S)-2-azaniumyl-4-methylpentanoyl]amino]-3-phenylpropanoate,M1_vs_PA14,-1.12056521740227,0.0290023011814112,-38.6371140135767,5,2.18861059719799e-07,2.62772673612625e-06,,0,0 +1-(isoquinolin-1-yl)cyclopropane-1-carboxylic acid,M1_vs_PA14,0.215723441232625,0.0152312166907122,14.1632441854871,5,3.15914149648933e-05,0.000105584782285149,,0,0 +1103336-91-9,M1_vs_PA14,0.29022605767552,0.0183287109373434,15.8345046014232,5,1.82767930270167e-05,6.76851765342366e-05,,0,0 +14705-60-3,M1_vs_PA14,0.123683287910055,0.0419301881992021,2.94974320941417,5,0.0318948651930595,0.0471543693246409,,0,0 +2-(5-methyl-1H-pyrazol-1-yl)propanoic acid,M1_vs_PA14,1.29620904556055,0.0454368556041306,28.5277013192504,5,9.91446542641938e-07,6.74684741111933e-06,,0,0 +2-heptylquinolin-4(1H)-one,M1_vs_PA14,-1.7150682256616,0.21784270906258,-7.8729659259283,5,0.000531283313542241,0.00120368875724414,,0,0 +2-Nonylquinolin-4(1h)-One,M1_vs_PA14,-3.65327716069814,0.17296582128874,-21.121381863065,5,4.40877520935246e-06,2.12545812522491e-05,,0,0 +2-Pentyl-1H-quinolin-4-one,M1_vs_PA14,-0.84528232620635,0.112777110911726,-7.49515854212634,5,0.000668256357600372,0.00147329033225345,,0,0 +2'-Aminoacetophenone,M1_vs_PA14,1.37542309134924,0.0572473144102815,24.0259845464858,5,2.32739756800271e-06,1.31894981689571e-05,,0,0 +3-Acetamidophenol,M1_vs_PA14,4.29849973415301,0.0351470347994133,122.300494442415,5,6.93189061706789e-10,1.40985756402756e-07,,0,0 +3-phenyl-2-[(pyridin-3-yl)formamido]propanoic acid,M1_vs_PA14,0.269827956821194,0.0412355951766881,6.54356886726196,5,0.00124815203594908,0.00256852247572491,,0,0 +AKOS011963716,M1_vs_PA14,-0.478044553848572,0.113143320504422,-4.22512395532788,5,0.00828693958078253,0.0142266676773908,,0,0 +AKOS016387022,M1_vs_PA14,-3.24410877003438,0.138832945549497,-23.3669951839911,5,2.67180689017543e-06,1.45559421617939e-05,,0,0 +Ala-phe,M1_vs_PA14,-4.18501154087428,0.0492289191587406,-85.0112416114507,5,4.26853086210599e-09,4.47010037503877e-07,,0,0 +Benzododecinium,M1_vs_PA14,1.88449732437776,0.665563970098395,2.83142929762133,5,0.036613169830235,0.0531299654580392,,0,0 +BET-IN-1,M1_vs_PA14,1.31205074794343,0.129256392488161,10.1507610005718,5,0.000159103397604721,0.000419454411866991,,0,0 +cetrimonium,M1_vs_PA14,0.100083852905068,0.0885572669084264,1.13015968535434,5,0.309689775292058,0.351031404946199,,0,0 +CID 6993119,M1_vs_PA14,-2.49118038173656,0.0418945349364344,-59.4631348818258,5,2.54536323129884e-08,9.61484000434261e-07,,0,0 +Cyclo-Ile-Pro-diketopiperazine,M1_vs_PA14,0.0955461855057725,0.0320052515711818,2.98532837004176,5,0.0306114868933012,0.0454351596802147,,0,0 +Cyclo(his-pro),M1_vs_PA14,0.674821765798337,0.0233600852522471,28.8878126304536,5,9.3147400193061e-07,6.60085899864361e-06,,0,0 +Cyclo(Tyr-Leu),M1_vs_PA14,0.0999739536883055,0.0576244552940825,1.73492232036026,5,0.143279551550854,0.178154323663166,,0,0 +Dihydroaeruginoic acid,M1_vs_PA14,3.65064773332069,0.0697408402810589,52.3459097798136,5,4.81053552547905e-08,1.19034615366574e-06,,0,0 +diprotin A,M1_vs_PA14,-0.06727816456575,0.0394538765812182,-1.70523584487962,5,0.148868555410736,0.18461659667713,,0,0 +Glycyl-L-leucine,M1_vs_PA14,-4.43122450201096,0.14220446642269,-31.1609375815212,5,6.38956185738948e-07,4.99764485526107e-06,,0,0 +H-gamma-glu-phe-oh,M1_vs_PA14,-2.58694885308984,0.0346076106187456,-74.7508656864219,5,8.1169027055239e-09,6.1442565539384e-07,,0,0 +HQNO,M1_vs_PA14,0.266579100666808,0.0999610386698175,2.66683003912503,5,0.0445174652278542,0.0629995660319107,,0,0 +l-alanyl-l-leucine,M1_vs_PA14,-1.5882826308944,0.109224878997931,-14.5413997750868,5,2.77661174963306e-05,9.48172671749696e-05,,0,0 +L-Alloisoleucine,M1_vs_PA14,-3.79798205129088,0.0175326177438212,-216.623786977238,5,3.97808452845538e-11,5.68208091689115e-08,,0,0 +L-arginine,M1_vs_PA14,0.19611790498701,0.0970827717018769,2.02011027856983,5,0.0993563098829096,0.128630936901981,,0,0 +L-Leucyl-L-alanine,M1_vs_PA14,-0.18836690147153,0.193137675063248,-0.975298586409122,5,0.374204666106239,0.417135301957576,,0,0 +L-methionine,M1_vs_PA14,-2.03199382318956,0.0395550960032599,-51.3712271870633,5,5.28375352359944e-08,1.24498442399812e-06,,0,0 +L-phenylalanine,M1_vs_PA14,-4.1474085488916,0.222024609412162,-18.6799497581479,5,8.09434835913336e-06,3.41338851386273e-05,,0,0 +L-Pyroglutamic acid,M1_vs_PA14,-4.84738990445273,0.290888613113778,-16.6640758212035,5,1.42162642879917e-05,5.48009369792728e-05,,0,0 +L-tryptophan,M1_vs_PA14,2.02816882176998,0.0678818690780375,29.877916582385,5,7.87687370218393e-07,5.75062968087929e-06,,0,0 +L-tyrosine,M1_vs_PA14,-2.12555702654781,0.103390413769867,-20.5585503437389,5,5.0395831752148e-06,2.34558377414319e-05,,0,0 +L-Val-L-Ala,M1_vs_PA14,-0.6617051111417,0.243155083849873,-2.72132953448855,5,0.0417088076445098,0.0596971164843591,,0,0 +methyl 2-[(pyrrolidin-2-yl)formamido]butanoate hydrochloride,M1_vs_PA14,-2.33945006925546,0.0515869465757631,-45.349651889546,5,9.84405823523105e-08,1.79303154157484e-06,,0,0 +N-Acetyl-DL-methionine,M1_vs_PA14,2.1295621519727,0.0165093517655868,128.991263994489,5,5.31156896244056e-10,1.25153843677506e-07,,0,0 +nicotinic acid,M1_vs_PA14,0.423973883173266,0.0376140107686597,11.2717010100535,5,9.60312428732735e-05,0.000269774803004651,,0,0 +octopamine,M1_vs_PA14,-2.99837622058394,0.0549854917283007,-54.5303156585338,5,3.92238508339915e-08,1.07155012785614e-06,,0,0 +Oleamide,M1_vs_PA14,0.555668395711015,0.164063006338366,3.38692072096373,5,0.0195265818141697,0.0303693124750081,,0,0 +pantothenic acid,M1_vs_PA14,-1.14745466433813,0.0381718160417632,-30.060258675739,5,7.64195672298129e-07,5.62698766516396e-06,,0,0 +Phe-ala,M1_vs_PA14,-1.72574801580392,0.0233851278633422,-73.7968176136914,5,8.6548919142615e-09,6.1442565539384e-07,,0,0 +Polidocanol,M1_vs_PA14,-0.267578759231152,0.113671655842747,-2.35396200791972,5,0.0652382765177613,0.0887259388426984,,0,0 +TRIBUTYL PHOSPHATE,M1_vs_PA14,-0.0458632707970992,0.0908651477767996,-0.504739957169908,5,0.635194450206758,0.668905887508234,,0,0 +Trospium,M1_vs_PA14,1.38982839940951,0.124905744481921,11.1270174576373,5,0.000102221670718716,0.00028504119719642,,0,0 +Val-Tyr,M1_vs_PA14,-0.593778662093816,0.61995827401643,-0.957771977534862,5,0.38216422091633,0.425504758669393,,0,0 diff --git a/man/annotateProteinInfoFromIndra.Rd b/man/annotateProteinInfoFromIndra.Rd index 15c0fc6..fa3e3ec 100644 --- a/man/annotateProteinInfoFromIndra.Rd +++ b/man/annotateProteinInfoFromIndra.Rd @@ -7,28 +7,46 @@ annotateProteinInfoFromIndra(df, proteinIdType) } \arguments{ -\item{df}{output of \code{\link[MSstats]{groupComparison}} function's -comparisonResult table, which contains a list of proteins and their -corresponding p-values, logFCs, along with additional HGNC ID and HGNC -name columns} +\item{df}{output of \code{\link[MSstats]{groupComparison}} function's +comparisonResult table. Must contain a \code{Protein} column whose +values are interpreted according to \code{proteinIdType}.} -\item{proteinIdType}{A character string specifying the type of protein ID. -It can be either "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name".} +\item{proteinIdType}{A character string specifying the type of analyte +identifier in the \code{Protein} column. One of +\code{"Uniprot"}, \code{"Uniprot_Mnemonic"}, \code{"Hgnc_Name"}, or +\code{"Compound"}. The \code{"Compound"} value treats inputs as +metabolite names and grounds them through Gilda, keeping whatever +namespace Gilda returns (CHEBI / PUBCHEM / CHEMBL / ...).} } \value{ A data frame with the following columns: \describe{ - \item{Protein}{Character. The original protein identifier.} - \item{UniprotID}{Character. The Uniprot ID of the protein.} - \item{HgncID}{Character. The HGNC ID of the protein.} - \item{HgncName}{Character. The HGNC name of the protein.} - \item{IsTranscriptionFactor}{Logical. Indicates if the protein is a transcription factor.} - \item{IsKinase}{Logical. Indicates if the protein is a kinase.} - \item{IsPhosphatase}{Logical. Indicates if the protein is a phosphatase.} + \item{Protein}{Character. The original identifier from the input.} + \item{GlobalProtein}{Character. The input identifier with the + MSstats mnemonic suffix stripped, used as the grounding key.} + \item{UniprotId}{Character. The Uniprot ID of the protein, or + \code{NA} for \code{"Hgnc_Name"} and \code{"Compound"} inputs.} + \item{EntityNamespace}{Character. The grounding namespace + (e.g. \code{"HGNC"}, \code{"CHEBI"}). When a single input grounds + to multiple candidates, namespaces are semicolon-joined and + positionally aligned with \code{EntityId} and \code{EntityName}.} + \item{EntityId}{Character. The bare grounding identifier within its + namespace (e.g. \code{"1097"} for HGNC, \code{"28748"} for + CHEBI). Semicolon-joined when multi-grounded.} + \item{EntityName}{Character. The canonical display name from the + grounding source. Semicolon-joined when multi-grounded.} + \item{IsTranscriptionFactor}{Logical. \code{NA} for + \code{proteinIdType == "Compound"}.} + \item{IsKinase}{Logical. \code{NA} for + \code{proteinIdType == "Compound"}.} + \item{IsPhosphatase}{Logical. \code{NA} for + \code{proteinIdType == "Compound"}.} } } \description{ -This function annotates a data frame with protein information from Indra. +This function annotates a data frame with entity (protein or compound) +grounding information from INDRA / Gilda, plus gene-only flags +(transcription factor / kinase / phosphatase) for the protein paths. } \examples{ df <- data.frame(Protein = c("CLH1_HUMAN")) diff --git a/man/cytoscapeNetwork.Rd b/man/cytoscapeNetwork.Rd index 4b0863b..8cae6be 100644 --- a/man/cytoscapeNetwork.Rd +++ b/man/cytoscapeNetwork.Rd @@ -17,15 +17,16 @@ cytoscapeNetwork( } \arguments{ \item{nodes}{Data frame with at minimum an \code{id} column. Optional -columns: \code{logFC} (numeric), \code{hgncName} -(character), \code{Site} (character, underscore-separated -PTM site list).} +columns: \code{logFC} (numeric), \code{entityName} +(character; may be semicolon-joined for multi-grounded +rows), \code{entityId} (character), \code{Site} +(character, underscore-separated PTM site list).} \item{edges}{Data frame with columns \code{source}, \code{target}, \code{interaction}. Optional: \code{site}, \code{evidenceLink}.} -\item{displayLabelType}{\code{"id"} (default) or \code{"hgncName"} – +\item{displayLabelType}{\code{"id"} (default) or \code{"entityName"} – controls which column is used as the visible node label.} \item{nodeFontSize}{Font size (px) for node labels. Default \code{12}.} diff --git a/man/dot-populateEntityIdsInDataFrame.Rd b/man/dot-populateEntityIdsInDataFrame.Rd new file mode 100644 index 0000000..48776fe --- /dev/null +++ b/man/dot-populateEntityIdsInDataFrame.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/annotateProteinInfoFromIndra.R +\name{.populateEntityIdsInDataFrame} +\alias{.populateEntityIdsInDataFrame} +\title{Populate Entity IDs and namespaces in Data Frame} +\usage{ +.populateEntityIdsInDataFrame(df, proteinIdType) +} +\arguments{ +\item{df}{A data frame containing protein information.} + +\item{proteinIdType}{A character string specifying the type of protein ID.} +} +\value{ +A data frame with populated entity grounding columns. +} +\description{ +Sets \code{EntityNamespace} and \code{EntityId}. For Gilda-sourced rows +(\code{"Hgnc_Name"}, \code{"Compound"}) also sets \code{EntityName} from +the same response, avoiding a second name lookup. +} diff --git a/man/dot-populateEntityNamesInDataFrame.Rd b/man/dot-populateEntityNamesInDataFrame.Rd new file mode 100644 index 0000000..a804101 --- /dev/null +++ b/man/dot-populateEntityNamesInDataFrame.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/annotateProteinInfoFromIndra.R +\name{.populateEntityNamesInDataFrame} +\alias{.populateEntityNamesInDataFrame} +\title{Populate Entity Names in Data Frame} +\usage{ +.populateEntityNamesInDataFrame(df) +} +\arguments{ +\item{df}{A data frame containing protein information.} +} +\value{ +A data frame with populated entity names. +} +\description{ +Fills \code{EntityName} for rows whose name was not set by the IDs step. +In practice this covers the UniProt / Uniprot_Mnemonic paths, where +\code{EntityId} is a single bare HGNC id; the HGNC names API is queried. +Gilda-sourced rows already have \code{EntityName} populated and are +skipped. +} diff --git a/man/dot-populateHgncIdsInDataFrame.Rd b/man/dot-populateHgncIdsInDataFrame.Rd deleted file mode 100644 index 6f2e953..0000000 --- a/man/dot-populateHgncIdsInDataFrame.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/annotateProteinInfoFromIndra.R -\name{.populateHgncIdsInDataFrame} -\alias{.populateHgncIdsInDataFrame} -\title{Populate HGNC IDs in Data Frame} -\usage{ -.populateHgncIdsInDataFrame(df, proteinIdType) -} -\arguments{ -\item{df}{A data frame containing protein information.} - -\item{proteinIdType}{A character string specifying the type of protein ID. -It can be either "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name".} -} -\value{ -A data frame with populated HGNC IDs. -} -\description{ -This function populates the HGNC IDs in the data frame based on the Uniprot IDs. -} diff --git a/man/dot-populateHgncNamesInDataFrame.Rd b/man/dot-populateHgncNamesInDataFrame.Rd deleted file mode 100644 index 23d91d8..0000000 --- a/man/dot-populateHgncNamesInDataFrame.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/annotateProteinInfoFromIndra.R -\name{.populateHgncNamesInDataFrame} -\alias{.populateHgncNamesInDataFrame} -\title{Populate HGNC Names in Data Frame} -\usage{ -.populateHgncNamesInDataFrame(df) -} -\arguments{ -\item{df}{A data frame containing protein information.} -} -\value{ -A data frame with populated HGNC names. -} -\description{ -This function populates the HGNC names in the data frame based on the HGNC IDs. -} diff --git a/man/dot-populateKinaseInfoInDataFrame.Rd b/man/dot-populateKinaseInfoInDataFrame.Rd index aa8dddd..a1bdf63 100644 --- a/man/dot-populateKinaseInfoInDataFrame.Rd +++ b/man/dot-populateKinaseInfoInDataFrame.Rd @@ -4,14 +4,17 @@ \alias{.populateKinaseInfoInDataFrame} \title{Populate Kinase Info in Data Frame} \usage{ -.populateKinaseInfoInDataFrame(df) +.populateKinaseInfoInDataFrame(df, proteinIdType) } \arguments{ \item{df}{A data frame containing protein information.} + +\item{proteinIdType}{The proteinIdType supplied by the caller. Gene-only +flags are \code{NA} (no API call) when this is \code{"Compound"}.} } \value{ A data frame with populated kinase information. } \description{ -This function populates the kinase information in the data frame based on the HGNC names. +Populate Kinase Info in Data Frame } diff --git a/man/dot-populatePhophataseInfoInDataFrame.Rd b/man/dot-populatePhophataseInfoInDataFrame.Rd index e0cd5b5..ee0a41f 100644 --- a/man/dot-populatePhophataseInfoInDataFrame.Rd +++ b/man/dot-populatePhophataseInfoInDataFrame.Rd @@ -4,14 +4,17 @@ \alias{.populatePhophataseInfoInDataFrame} \title{Populate Phosphatase Info in Data Frame} \usage{ -.populatePhophataseInfoInDataFrame(df) +.populatePhophataseInfoInDataFrame(df, proteinIdType) } \arguments{ \item{df}{A data frame containing protein information.} + +\item{proteinIdType}{The proteinIdType supplied by the caller. Gene-only +flags are \code{NA} (no API call) when this is \code{"Compound"}.} } \value{ A data frame with populated phosphatase information. } \description{ -This function populates the phosphatase information in the data frame based on the HGNC names. +Populate Phosphatase Info in Data Frame } diff --git a/man/dot-populateTranscriptionFactorInfoInDataFrame.Rd b/man/dot-populateTranscriptionFactorInfoInDataFrame.Rd index e54c6f3..099fcb9 100644 --- a/man/dot-populateTranscriptionFactorInfoInDataFrame.Rd +++ b/man/dot-populateTranscriptionFactorInfoInDataFrame.Rd @@ -4,14 +4,17 @@ \alias{.populateTranscriptionFactorInfoInDataFrame} \title{Populate Transcription Factor Info in Data Frame} \usage{ -.populateTranscriptionFactorInfoInDataFrame(df) +.populateTranscriptionFactorInfoInDataFrame(df, proteinIdType) } \arguments{ \item{df}{A data frame containing protein information.} + +\item{proteinIdType}{The proteinIdType supplied by the caller. Gene-only +flags are \code{NA} (no API call) when this is \code{"Compound"}.} } \value{ A data frame with populated transcription factor information. } \description{ -This function populates the transcription factor information in the data frame based on the HGNC names. +Populate Transcription Factor Info in Data Frame } diff --git a/man/dot-populateUniprotIdsInDataFrame.Rd b/man/dot-populateUniprotIdsInDataFrame.Rd index 43bee6e..4d26321 100644 --- a/man/dot-populateUniprotIdsInDataFrame.Rd +++ b/man/dot-populateUniprotIdsInDataFrame.Rd @@ -9,12 +9,11 @@ \arguments{ \item{df}{A data frame containing protein information.} -\item{proteinIdType}{A character string specifying the type of protein ID. -It can be either "Uniprot" or "Uniprot_Mnemonic".} +\item{proteinIdType}{A character string specifying the type of protein ID.} } \value{ A data frame with populated Uniprot IDs. } \description{ -This function populates the Uniprot IDs in the data frame based on the protein ID type. +Populate Uniprot IDs in Data Frame } diff --git a/man/dot-validateAnnotateProteinInfoFromIndraInput.Rd b/man/dot-validateAnnotateProteinInfoFromIndraInput.Rd index e9f9e2a..83d8e33 100644 --- a/man/dot-validateAnnotateProteinInfoFromIndraInput.Rd +++ b/man/dot-validateAnnotateProteinInfoFromIndraInput.Rd @@ -4,14 +4,16 @@ \alias{.validateAnnotateProteinInfoFromIndraInput} \title{Validate Annotate Protein Info Input} \usage{ -.validateAnnotateProteinInfoFromIndraInput(df) +.validateAnnotateProteinInfoFromIndraInput(df, proteinIdType) } \arguments{ \item{df}{A data frame containing protein information.} + +\item{proteinIdType}{The proteinIdType supplied by the caller.} } \value{ None. Throws an error if validation fails. } \description{ -This function validates the input data frame for the annotateProteinInfoFromIndra function. +Validate Annotate Protein Info Input } diff --git a/man/exportNetworkToHTML.Rd b/man/exportNetworkToHTML.Rd index 8e6521e..01a2d40 100644 --- a/man/exportNetworkToHTML.Rd +++ b/man/exportNetworkToHTML.Rd @@ -15,9 +15,10 @@ exportNetworkToHTML( } \arguments{ \item{nodes}{Data frame with at minimum an \code{id} column. Optional -columns: \code{logFC} (numeric), \code{hgncName} -(character), \code{Site} (character, underscore-separated -PTM site list).} +columns: \code{logFC} (numeric), \code{entityName} +(character; may be semicolon-joined for multi-grounded +rows), \code{entityId} (character), \code{Site} +(character, underscore-separated PTM site list).} \item{edges}{Data frame with columns \code{source}, \code{target}, \code{interaction}. Optional: \code{site}, @@ -25,7 +26,7 @@ PTM site list).} \item{filename}{Output HTML filename} -\item{displayLabelType}{\code{"id"} (default) or \code{"hgncName"} – +\item{displayLabelType}{\code{"id"} (default) or \code{"entityName"} – controls which column is used as the visible node label.} \item{nodeFontSize}{Font size (px) for node labels. Default \code{12}.} diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd index 4f9ff50..b66a271 100644 --- a/man/getSubnetworkFromIndra.Rd +++ b/man/getSubnetworkFromIndra.Rd @@ -22,10 +22,13 @@ getSubnetworkFromIndra( ) } \arguments{ -\item{input}{output of \code{\link[MSstats]{groupComparison}} function's -comparisionResult table, which contains a list of proteins and their -corresponding p-values, logFCs, along with additional HGNC ID and HGNC -name columns} +\item{input}{output of \code{\link[MSstats]{groupComparison}} function's +comparisionResult table, annotated by +\code{\link{annotateProteinInfoFromIndra}}. Must contain \code{Protein}, +\code{EntityNamespace}, and \code{EntityId} columns (and typically also +\code{EntityName}, \code{log2FC}, \code{adj.pvalue}). When an analyte +grounds to multiple candidates the three \code{Entity*} columns are +semicolon-joined and positionally aligned.} \item{protein_level_data}{output of the \code{\link[MSstats]{dataProcess}} function's ProteinLevelData table, which contains a list of proteins and diff --git a/man/previewNetworkInBrowser.Rd b/man/previewNetworkInBrowser.Rd index c9ea22e..616e38f 100644 --- a/man/previewNetworkInBrowser.Rd +++ b/man/previewNetworkInBrowser.Rd @@ -13,15 +13,16 @@ previewNetworkInBrowser( } \arguments{ \item{nodes}{Data frame with at minimum an \code{id} column. Optional -columns: \code{logFC} (numeric), \code{hgncName} -(character), \code{Site} (character, underscore-separated -PTM site list).} +columns: \code{logFC} (numeric), \code{entityName} +(character; may be semicolon-joined for multi-grounded +rows), \code{entityId} (character), \code{Site} +(character, underscore-separated PTM site list).} \item{edges}{Data frame with columns \code{source}, \code{target}, \code{interaction}. Optional: \code{site}, \code{evidenceLink}.} -\item{displayLabelType}{\code{"id"} (default) or \code{"hgncName"} – +\item{displayLabelType}{\code{"id"} (default) or \code{"entityName"} – controls which column is used as the visible node label.} \item{nodeFontSize}{Font size (px) for node labels. Default \code{12}.} diff --git a/tests/testthat/test-annotateProteinInfoFromIndra.R b/tests/testthat/test-annotateProteinInfoFromIndra.R index c4d201c..bfce822 100644 --- a/tests/testthat/test-annotateProteinInfoFromIndra.R +++ b/tests/testthat/test-annotateProteinInfoFromIndra.R @@ -1,26 +1,29 @@ test_that("annotateProteinInfoFromIndra works correctly with Uniprot_Mnemonic", { df <- data.frame(Protein = c("CLH1_HUMAN")) annotated_df <- annotateProteinInfoFromIndra(df, "Uniprot_Mnemonic") - + expect_true("Protein" %in% colnames(annotated_df)) expect_true("UniprotId" %in% colnames(annotated_df)) - expect_true("HgncId" %in% colnames(annotated_df)) - expect_true("HgncName" %in% colnames(annotated_df)) + expect_true("EntityNamespace" %in% colnames(annotated_df)) + expect_true("EntityId" %in% colnames(annotated_df)) + expect_true("EntityName" %in% colnames(annotated_df)) expect_true("IsTranscriptionFactor" %in% colnames(annotated_df)) expect_true("IsKinase" %in% colnames(annotated_df)) expect_true("IsPhosphatase" %in% colnames(annotated_df)) expect_false(is.na(annotated_df$UniprotId)) - expect_false(is.na(annotated_df$HgncId)) - expect_false(is.na(annotated_df$HgncName)) + expect_false(is.na(annotated_df$EntityNamespace)) + expect_false(is.na(annotated_df$EntityId)) + expect_false(is.na(annotated_df$EntityName)) expect_false(is.na(annotated_df$IsTranscriptionFactor)) expect_false(is.na(annotated_df$IsKinase)) expect_false(is.na(annotated_df$IsPhosphatase)) - + expect_equal(annotated_df$Protein, "CLH1_HUMAN") expect_equal(annotated_df$UniprotId, "Q00610") - expect_equal(annotated_df$HgncId, "2092") - expect_equal(annotated_df$HgncName, "CLTC") + expect_equal(annotated_df$EntityNamespace, "HGNC") + expect_equal(annotated_df$EntityId, "2092") + expect_equal(annotated_df$EntityName, "CLTC") expect_equal(annotated_df$IsTranscriptionFactor, FALSE) expect_equal(annotated_df$IsKinase, FALSE) expect_equal(annotated_df$IsPhosphatase, FALSE) @@ -32,29 +35,40 @@ test_that("annotateProteinInfoFromIndra throws error for missing Protein column" expect_error(annotateProteinInfoFromIndra(df, "Uniprot_Mnemonic"), "Input dataframe must contain 'Protein' column.") }) +test_that("annotateProteinInfoFromIndra throws error for invalid proteinIdType", { + df <- data.frame(Protein = c("CLH1_HUMAN")) + expect_error( + annotateProteinInfoFromIndra(df, "NotAType"), + "Invalid proteinIdType" + ) +}) + test_that("annotateProteinInfoFromIndra works correctly with Uniprot", { df <- data.frame(Protein = c("Q00610")) annotated_df <- annotateProteinInfoFromIndra(df, "Uniprot") - + expect_true("Protein" %in% colnames(annotated_df)) expect_true("UniprotId" %in% colnames(annotated_df)) - expect_true("HgncId" %in% colnames(annotated_df)) - expect_true("HgncName" %in% colnames(annotated_df)) + expect_true("EntityNamespace" %in% colnames(annotated_df)) + expect_true("EntityId" %in% colnames(annotated_df)) + expect_true("EntityName" %in% colnames(annotated_df)) expect_true("IsTranscriptionFactor" %in% colnames(annotated_df)) expect_true("IsKinase" %in% colnames(annotated_df)) expect_true("IsPhosphatase" %in% colnames(annotated_df)) expect_false(is.na(annotated_df$UniprotId)) - expect_false(is.na(annotated_df$HgncId)) - expect_false(is.na(annotated_df$HgncName)) + expect_false(is.na(annotated_df$EntityNamespace)) + expect_false(is.na(annotated_df$EntityId)) + expect_false(is.na(annotated_df$EntityName)) expect_false(is.na(annotated_df$IsTranscriptionFactor)) expect_false(is.na(annotated_df$IsKinase)) expect_false(is.na(annotated_df$IsPhosphatase)) - + expect_equal(annotated_df$Protein, "Q00610") expect_equal(annotated_df$UniprotId, "Q00610") - expect_equal(annotated_df$HgncId, "2092") - expect_equal(annotated_df$HgncName, "CLTC") + expect_equal(annotated_df$EntityNamespace, "HGNC") + expect_equal(annotated_df$EntityId, "2092") + expect_equal(annotated_df$EntityName, "CLTC") expect_equal(annotated_df$IsTranscriptionFactor, FALSE) expect_equal(annotated_df$IsKinase, FALSE) expect_equal(annotated_df$IsPhosphatase, FALSE) @@ -63,49 +77,54 @@ test_that("annotateProteinInfoFromIndra works correctly with Uniprot", { test_that("annotateProteinInfoFromIndra returns NA for unknown protein id", { df <- data.frame(Protein = c("ABC")) annotated_df <- annotateProteinInfoFromIndra(df, "Uniprot_Mnemonic") - + expect_true("Protein" %in% colnames(annotated_df)) expect_true("UniprotId" %in% colnames(annotated_df)) - expect_true("HgncId" %in% colnames(annotated_df)) - expect_true("HgncName" %in% colnames(annotated_df)) + expect_true("EntityNamespace" %in% colnames(annotated_df)) + expect_true("EntityId" %in% colnames(annotated_df)) + expect_true("EntityName" %in% colnames(annotated_df)) expect_true("IsTranscriptionFactor" %in% colnames(annotated_df)) expect_true("IsKinase" %in% colnames(annotated_df)) expect_true("IsPhosphatase" %in% colnames(annotated_df)) expect_true(is.na(annotated_df$UniprotId)) - expect_true(is.na(annotated_df$HgncId)) - expect_true(is.na(annotated_df$HgncName)) + expect_true(is.na(annotated_df$EntityNamespace)) + expect_true(is.na(annotated_df$EntityId)) + expect_true(is.na(annotated_df$EntityName)) expect_true(is.na(annotated_df$IsTranscriptionFactor)) expect_true(is.na(annotated_df$IsKinase)) expect_true(is.na(annotated_df$IsPhosphatase)) - + expect_equal(annotated_df$Protein, "ABC") }) test_that("annotateProteinInfoFromIndra works correctly with HGNC name", { df <- data.frame(Protein = c("EGFR")) annotated_df <- annotateProteinInfoFromIndra(df, "Hgnc_Name") - + expect_true("Protein" %in% colnames(annotated_df)) expect_true("UniprotId" %in% colnames(annotated_df)) - expect_true("HgncId" %in% colnames(annotated_df)) - expect_true("HgncName" %in% colnames(annotated_df)) + expect_true("EntityNamespace" %in% colnames(annotated_df)) + expect_true("EntityId" %in% colnames(annotated_df)) + expect_true("EntityName" %in% colnames(annotated_df)) expect_true("IsTranscriptionFactor" %in% colnames(annotated_df)) expect_true("IsKinase" %in% colnames(annotated_df)) expect_true("IsPhosphatase" %in% colnames(annotated_df)) - + expect_true(is.na(annotated_df$UniprotId)) - expect_false(is.na(annotated_df$HgncId)) - expect_false(is.na(annotated_df$HgncName)) + expect_false(is.na(annotated_df$EntityNamespace)) + expect_false(is.na(annotated_df$EntityId)) + expect_false(is.na(annotated_df$EntityName)) expect_false(is.na(annotated_df$IsTranscriptionFactor)) expect_false(is.na(annotated_df$IsKinase)) expect_false(is.na(annotated_df$IsPhosphatase)) - + expect_equal(annotated_df$Protein, "EGFR") - expect_equal(annotated_df$HgncId, "3236") - expect_equal(annotated_df$HgncName, "EGFR") + expect_equal(annotated_df$EntityNamespace, "HGNC") + expect_equal(annotated_df$EntityId, "3236") + expect_equal(annotated_df$EntityName, "EGFR") expect_type(annotated_df$IsTranscriptionFactor, "logical") expect_type(annotated_df$IsKinase, "logical") expect_type(annotated_df$IsPhosphatase, "logical") - -}) \ No newline at end of file + +}) diff --git a/tests/testthat/test-exportNetworkToHTML.R b/tests/testthat/test-exportNetworkToHTML.R index 3a49a29..cb85cd2 100644 --- a/tests/testthat/test-exportNetworkToHTML.R +++ b/tests/testthat/test-exportNetworkToHTML.R @@ -66,14 +66,14 @@ test_that("exportNetworkToHTML passes nodeFontSize through to the widget", { test_that("exportNetworkToHTML passes displayLabelType through to the widget", { save_widget_mock <- mock() stub(exportNetworkToHTML, "htmlwidgets::saveWidget", save_widget_mock) - - nodes_hgnc <- make_nodes() - nodes_hgnc$hgncName <- c("TP53", "MDM2") - - exportNetworkToHTML(nodes_hgnc, make_edges(), + + nodes_entity <- make_nodes() + nodes_entity$entityName <- c("TP53", "MDM2") + + exportNetworkToHTML(nodes_entity, make_edges(), filename = tempfile(fileext = ".html"), - displayLabelType = "hgncName") - + displayLabelType = "entityName") + widget_arg <- mock_args(save_widget_mock)[[1]][[1]] protein_nodes <- Filter(function(el) !is.null(el$data$node_type) && el$data$node_type == "protein", @@ -150,15 +150,15 @@ test_that("previewNetworkInBrowser passes nodeFontSize and displayLabelType thro export_mock <- mock(invisible(NULL)) stub(previewNetworkInBrowser, "exportNetworkToHTML", export_mock) stub(previewNetworkInBrowser, "interactive", mock(FALSE)) - - nodes_hgnc <- make_nodes() - nodes_hgnc$hgncName <- c("TP53", "MDM2") - - previewNetworkInBrowser(nodes_hgnc, make_edges(), - displayLabelType = "hgncName", + + nodes_entity <- make_nodes() + nodes_entity$entityName <- c("TP53", "MDM2") + + previewNetworkInBrowser(nodes_entity, make_edges(), + displayLabelType = "entityName", nodeFontSize = 16) - + call_args <- mock_args(export_mock)[[1]] - expect_equal(call_args$displayLabelType, "hgncName") + expect_equal(call_args$displayLabelType, "entityName") expect_equal(call_args$nodeFontSize, 16) }) \ No newline at end of file diff --git a/tests/testthat/test-getSubnetworkFromIndra.R b/tests/testthat/test-getSubnetworkFromIndra.R index ddeca35..5207ada 100644 --- a/tests/testthat/test-getSubnetworkFromIndra.R +++ b/tests/testthat/test-getSubnetworkFromIndra.R @@ -2,7 +2,7 @@ test_that("getSubnetworkFromIndra works correctly", { input <- data.table::fread( system.file("extdata/groupComparisonModel.csv", package = "MSstatsBioNet") ) - local_mocked_bindings(.callIndraCogexApi = function(x,y) { + local_mocked_bindings(.callIndraCogexApi = function(ns, ids, fio) { return(readRDS(system.file("extdata/indraResponse.rds", package = "MSstatsBioNet"))) }) suppressWarnings(subnetwork <- getSubnetworkFromIndra(input, statement_types = c("Activation", "Phosphorylation"))) @@ -14,7 +14,7 @@ test_that("getSubnetworkFromIndra with different statement type works correctly" input <- data.table::fread( system.file("extdata/groupComparisonModel.csv", package = "MSstatsBioNet") ) - local_mocked_bindings(.callIndraCogexApi = function(x,y) { + local_mocked_bindings(.callIndraCogexApi = function(ns, ids, fio) { return(readRDS(system.file("extdata/indraResponse.rds", package = "MSstatsBioNet"))) }) suppressWarnings( @@ -27,7 +27,8 @@ test_that("getSubnetworkFromIndra with different statement type works correctly" test_that("Exception is thrown for 400+ proteins in dataframe", { input_400 <- data.frame( Protein = paste0("Protein", 1:400), - HgncId = paste0("HGNCID", 1:400), + EntityNamespace = rep("HGNC", 400), + EntityId = paste0("HGNCID", 1:400), issue = NA, adj.pvalue = 0.05 ) @@ -38,13 +39,13 @@ test_that("Exception is thrown for 400+ proteins in dataframe", { }) test_that("Exception is thrown for missing columns in input", { - input_missing_hgnc_id <- data.frame( + input_missing_entity_id <- data.frame( Protein = paste0("Protein", 1:10), issue = NA, adj.pvalue = 0.05 ) expect_error( - getSubnetworkFromIndra(input_missing_hgnc_id), - "Invalid Input Error: Input must contain a column named 'HgncId'." + getSubnetworkFromIndra(input_missing_entity_id), + "Invalid Input Error: Input must contain columns named 'EntityId' and 'EntityNamespace'." ) }) diff --git a/tests/testthat/test-multi-grounding.R b/tests/testthat/test-multi-grounding.R new file mode 100644 index 0000000..9ea8b3c --- /dev/null +++ b/tests/testthat/test-multi-grounding.R @@ -0,0 +1,226 @@ +# Tests for the multi-grounding fan-out, membership round-trip, and the +# post-split < 400 guard introduced for the "Compound" / Entity* contract. + +pair_str <- function(p) paste(p[[1]], p[[2]], sep = ":") + +# ----- .buildCogexGroundings fan-out ----- + +test_that(".buildCogexGroundings fans out semicolon-joined (ns, id) pairs", { + pairs <- MSstatsBioNet:::.buildCogexGroundings( + namespaces = c("HGNC;CHEBI", "HGNC"), + ids = c("3815;17234", "1097"), + force_include_other = NULL + ) + expect_setequal(vapply(pairs, pair_str, character(1)), + c("HGNC:3815", "CHEBI:17234", "HGNC:1097")) +}) + +test_that(".buildCogexGroundings appends force_include_other groundings", { + pairs <- MSstatsBioNet:::.buildCogexGroundings( + namespaces = "HGNC", + ids = "1097", + force_include_other = c("HGNC:9999", "CHEBI:4911") + ) + pair_strings <- vapply(pairs, pair_str, character(1)) + expect_true("HGNC:1097" %in% pair_strings) + expect_true("HGNC:9999" %in% pair_strings) + expect_true("CHEBI:4911" %in% pair_strings) +}) + +test_that(".buildCogexGroundings deduplicates repeated pairs", { + pairs <- MSstatsBioNet:::.buildCogexGroundings( + namespaces = c("HGNC", "HGNC"), + ids = c("1097", "1097"), + force_include_other = NULL + ) + expect_equal(length(pairs), 1) +}) + +test_that(".buildCogexGroundings errors on mismatched per-row ns/id lengths", { + expect_error( + MSstatsBioNet:::.buildCogexGroundings( + namespaces = "HGNC;CHEBI", + ids = "1097", + force_include_other = NULL + ), + "positionally aligned" + ) +}) + +test_that(".buildCogexGroundings errors on bad force_include_other format", { + expect_error( + MSstatsBioNet:::.buildCogexGroundings( + namespaces = "HGNC", + ids = "1097", + force_include_other = "no_colon_here" + ), + "Invalid identifier format" + ) +}) + +# ----- .rowMatchesEndpoint + .addAdditionalMetadataToIndraEdge membership round-trip ----- + +test_that(".rowMatchesEndpoint matches a (ns, id) endpoint via membership in ;-split EntityId", { + input <- data.frame( + Protein = c("FOO", "BAR"), + EntityNamespace = c("HGNC;CHEBI", "HGNC"), + EntityId = c("3815;17234", "1097"), + stringsAsFactors = FALSE + ) + + expect_equal(MSstatsBioNet:::.rowMatchesEndpoint(input, "CHEBI", "17234"), + c(TRUE, FALSE)) + expect_equal(MSstatsBioNet:::.rowMatchesEndpoint(input, "HGNC", "1097"), + c(FALSE, TRUE)) + # The id 17234 appears in FOO but only under namespace CHEBI, so a HGNC:17234 + # query must NOT match — namespace-awareness is the whole point. + expect_equal(MSstatsBioNet:::.rowMatchesEndpoint(input, "HGNC", "17234"), + c(FALSE, FALSE)) +}) + +test_that(".addAdditionalMetadataToIndraEdge recovers original Protein from a multi-grounded endpoint", { + input <- data.frame( + Protein = c("FOO", "BAR"), + EntityNamespace = c("HGNC;CHEBI", "HGNC"), + EntityId = c("3815;17234", "1097"), + stringsAsFactors = FALSE + ) + edge <- list( + source_id = "17234", source_ns = "CHEBI", source_name = "glucose", + target_id = "1097", target_ns = "HGNC", target_name = "A1BG" + ) + out <- MSstatsBioNet:::.addAdditionalMetadataToIndraEdge(edge, input) + expect_equal(out$source_uniprot_id, "FOO") # not "17234" or "glucose" + expect_equal(out$target_uniprot_id, "BAR") # not "1097" or "A1BG" +}) + +# ----- .constructNodesDataFrame carries entityName + entityId ----- + +test_that(".constructNodesDataFrame emits id, entityName, entityId, Site, logFC, adj.pvalue", { + input <- data.frame( + Protein = c("FOO", "BAR"), + EntityNamespace = c("HGNC;CHEBI", "HGNC"), + EntityId = c("3815;17234", "1097"), + EntityName = c("KIT;glucose", "A1BG"), + Site = c(NA_character_, NA_character_), + log2FC = c(1.5, -0.8), + adj.pvalue = c(0.01, 0.04), + stringsAsFactors = FALSE + ) + edges <- data.frame(source = c("FOO"), target = c("BAR"), + stringsAsFactors = FALSE) + nodes <- MSstatsBioNet:::.constructNodesDataFrame(input, edges) + expect_equal(colnames(nodes), + c("id", "entityName", "entityId", "Site", "logFC", "adj.pvalue")) + expect_equal(nodes$entityName[nodes$id == "FOO"], "KIT;glucose") + expect_equal(nodes$entityId[nodes$id == "FOO"], "3815;17234") + expect_equal(nodes$entityName[nodes$id == "BAR"], "A1BG") +}) + +# ----- < 400 guard counts post-split unique pairs ----- + +test_that(".validateGetSubnetworkFromIndraInput counts unique (ns, id) pairs AFTER ;-splitting", { + # 200 rows × 2 pairs each = 400 unique pairs → fails the < 400 guard + input_over <- data.frame( + Protein = paste0("P", 1:200), + EntityNamespace = rep("HGNC;CHEBI", 200), + EntityId = paste0(1:200, ";C", 1:200), + log2FC = rep(1.0, 200), + adj.pvalue = rep(0.01, 200), + stringsAsFactors = FALSE + ) + expect_error( + MSstatsBioNet:::.validateGetSubnetworkFromIndraInput( + input_over, protein_level_data = NULL, + sources_filter = NULL, force_include_other = NULL + ), + "less than 400 proteins" + ) + + # 200 rows × 1 pair each = 200 unique pairs → passes + input_under <- data.frame( + Protein = paste0("P", 1:200), + EntityNamespace = rep("HGNC", 200), + EntityId = as.character(1:200), + log2FC = rep(1.0, 200), + adj.pvalue = rep(0.01, 200), + stringsAsFactors = FALSE + ) + expect_silent( + MSstatsBioNet:::.validateGetSubnetworkFromIndraInput( + input_under, protein_level_data = NULL, + sources_filter = NULL, force_include_other = NULL + ) + ) +}) + +# ----- Compound proteinIdType unit test (mocked Gilda) ----- + +test_that("annotateProteinInfoFromIndra with Compound mocks Gilda and skips gene-only flags", { + df <- data.frame(Protein = c("glucose", "FOO")) + local_mocked_bindings( + .callGroundEntitiesFromGildaApi = function(textInputs, keep_only = NULL) { + list( + glucose = list(ns = "CHEBI", + id = "17234", + name = "glucose"), + FOO = list(ns = c("MESH", "CHEBI"), + id = c("3815", "17234"), + name = c("KIT", "glucose")) + ) + } + ) + annotated_df <- annotateProteinInfoFromIndra(df, "Compound") + + expect_true(all(c("EntityNamespace", "EntityId", "EntityName") %in% colnames(annotated_df))) + + # UniprotId and gene-only flags must be NA for Compound (no API calls) + expect_true(all(is.na(annotated_df$UniprotId))) + expect_true(all(is.na(annotated_df$IsTranscriptionFactor))) + expect_true(all(is.na(annotated_df$IsKinase))) + expect_true(all(is.na(annotated_df$IsPhosphatase))) + + glucose_row <- annotated_df[annotated_df$Protein == "glucose", ] + expect_equal(glucose_row$EntityNamespace, "CHEBI") + expect_equal(glucose_row$EntityId, "17234") + expect_equal(glucose_row$EntityName, "glucose") + + # Multi-grounded row — three Entity* columns are semicolon-joined and aligned + foo_row <- annotated_df[annotated_df$Protein == "FOO", ] + expect_equal(foo_row$EntityNamespace, "MESH;CHEBI") + expect_equal(foo_row$EntityId, "3815;17234") + expect_equal(foo_row$EntityName, "KIT;glucose") +}) + +# ----- Compound E2E test (mocked end-to-end; skipped if real fixture absent) ----- + +test_that("annotateProteinInfoFromIndra(Compound) -> getSubnetworkFromIndra E2E (mocked, real fixture)", { + fixture_path <- system.file("extdata/groupComparisonModel_compound.csv", + package = "MSstatsBioNet") + skip_if_not(nzchar(fixture_path) && file.exists(fixture_path), + "Compound fixture not yet provided (see TODO-MSBio-20260528).") + + df <- data.table::fread(fixture_path) + + local_mocked_bindings( + .callGroundEntitiesFromGildaApi = function(textInputs, keep_only = NULL) { + result <- list() + for (i in seq_along(textInputs)) { + text_i <- as.character(textInputs[[i]]) + result[[text_i]] <- list( + ns = "CHEBI", + id = as.character(17000 + i), + name = paste0("compound_", i) + ) + } + result + }, + .callIndraCogexApi = function(ns, ids, fio) list() + ) + + annotated <- annotateProteinInfoFromIndra(df, "Compound") + expect_true(all(c("EntityNamespace", "EntityId", "EntityName") %in% colnames(annotated))) + expect_true(any(grepl("CHEBI", annotated$EntityNamespace))) + expect_true(all(is.na(annotated$UniprotId))) + expect_true(all(is.na(annotated$IsTranscriptionFactor))) +}) diff --git a/tests/testthat/test-utils_annotateProteinInfoFromIndra.R.R b/tests/testthat/test-utils_annotateProteinInfoFromIndra.R.R index 53f0fe9..dbf6dd5 100644 --- a/tests/testthat/test-utils_annotateProteinInfoFromIndra.R.R +++ b/tests/testthat/test-utils_annotateProteinInfoFromIndra.R.R @@ -76,12 +76,38 @@ test_that(".callIsTranscriptionFactorApi works correctly", { expect_equal(result, expected_value) }) -test_that(".callGetHgncIdsFromGildaApi works correctly", { - hgnc_names <- list("EGFR", "CHEK1") - result <- .callGetHgncIdsFromGildaApi(hgnc_names) - expect_type(result, "character") +test_that(".callGroundEntitiesFromGildaApi returns aligned (ns, id, name) per input (live)", { + text_inputs <- list("EGFR", "CHEK1") + result <- .callGroundEntitiesFromGildaApi(text_inputs, keep_only = "HGNC") + expect_type(result, "list") expect_true(length(result) == 2) - expected_value <- c("EGFR" = "3236", "CHEK1" = "1925") - expect_equal(result, expected_value) + expect_setequal(names(result), c("EGFR", "CHEK1")) + for (input_text in names(result)) { + g <- result[[input_text]] + expect_true(all(c("ns", "id", "name") %in% names(g))) + expect_equal(length(g$ns), length(g$id)) + expect_equal(length(g$ns), length(g$name)) + expect_true(all(g$ns == "HGNC")) + } + expect_true("3236" %in% result[["EGFR"]]$id) + expect_true("1925" %in% result[["CHEK1"]]$id) +}) + +test_that(".callGroundEntitiesFromGildaApi keeps non-HGNC namespaces when keep_only is NULL (mocked)", { + text_inputs <- list("EGFR", "glucose") + local_mocked_bindings(.callGroundEntitiesFromGildaApi = function(textInputs, keep_only = NULL) { + list( + EGFR = list(ns = "HGNC", id = "3236", name = "EGFR"), + glucose = list(ns = c("MESH", "CHEBI"), + id = c("3815", "17234"), + name = c("KIT", "glucose")) + ) + }) + result <- .callGroundEntitiesFromGildaApi(text_inputs) + expect_setequal(names(result), c("EGFR", "glucose")) + expect_equal(result[["EGFR"]]$ns, "HGNC") + expect_equal(result[["glucose"]]$ns, c("MESH", "CHEBI")) + expect_equal(result[["glucose"]]$id, c("3815", "17234")) + expect_equal(result[["glucose"]]$name, c("KIT", "glucose")) }) diff --git a/tests/testthat/test-utils_cytoscapeNetwork.R b/tests/testthat/test-utils_cytoscapeNetwork.R index 780c611..f2118dc 100644 --- a/tests/testthat/test-utils_cytoscapeNetwork.R +++ b/tests/testthat/test-utils_cytoscapeNetwork.R @@ -7,7 +7,7 @@ create_mock_nodes <- function() { id = c("P53_HUMAN", "MDM2_HUMAN", "ATM_HUMAN", "BRCA1_HUMAN"), logFC = c(2.5, -1.8, 1.2, -2.1), pvalue = c(0.001, 0.02, 0.03, 0.005), - hgncName = c("TP53", "MDM2", "ATM", "BRCA1"), + entityName = c("TP53", "MDM2", "ATM", "BRCA1"), stringsAsFactors = FALSE ) } @@ -16,7 +16,7 @@ create_mock_nodes_ptm <- function() { data.frame( id = c("P53_HUMAN", "MDM2_HUMAN"), logFC = c(2.5, -1.8), - hgncName = c("TP53", "MDM2"), + entityName = c("TP53", "MDM2"), Site = c(NA, "S15_S20"), stringsAsFactors = FALSE ) @@ -236,9 +236,9 @@ test_that(".buildElements creates PTM child nodes and attachment edges", { expect_true("ptm_attachment" %in% edge_types) }) -test_that(".buildElements uses hgncName label when requested", { +test_that(".buildElements uses entityName label when requested", { nodes <- create_mock_nodes() - result <- MSstatsBioNet:::.buildElements(nodes, data.frame(), "hgncName") + result <- MSstatsBioNet:::.buildElements(nodes, data.frame(), "entityName") protein_nodes <- Filter(function(el) !is.null(el$data$node_type) && el$data$node_type == "protein", result) @@ -246,10 +246,10 @@ test_that(".buildElements uses hgncName label when requested", { expect_true(all(labels %in% c("TP53", "MDM2", "ATM", "BRCA1"))) }) -test_that(".buildElements falls back to id when hgncName is NA", { +test_that(".buildElements falls back to id when entityName is NA", { nodes <- create_mock_nodes() - nodes$hgncName <- NA - result <- MSstatsBioNet:::.buildElements(nodes, data.frame(), "hgncName") + nodes$entityName <- NA + result <- MSstatsBioNet:::.buildElements(nodes, data.frame(), "entityName") protein_nodes <- Filter(function(el) !is.null(el$data$node_type) && el$data$node_type == "protein", result) diff --git a/tests/testthat/test-utils_getSubnetworkFromIndra.R b/tests/testthat/test-utils_getSubnetworkFromIndra.R index a3e15b9..696d4d3 100644 --- a/tests/testthat/test-utils_getSubnetworkFromIndra.R +++ b/tests/testthat/test-utils_getSubnetworkFromIndra.R @@ -181,7 +181,9 @@ describe(".filterGetSubnetworkFromIndraInput", { }) test_that(".filterGetSubnetworkFromIndraInput respects force_include_other", { - input <- cbind(.make_test_input(), HgncId = c("1", "2", "3", "4")) + input <- cbind(.make_test_input(), + EntityNamespace = rep("HGNC", 4), + EntityId = c("1", "2", "3", "4")) result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( input, pvalueCutoff = 0.001, logfc_cutoff = 10, force_include_other = c("HGNC:1"), include_infinite_fc = FALSE, direction = "both" @@ -220,7 +222,8 @@ describe(".filterGetSubnetworkFromIndraInput", { adj.pvalue = c(0.01, 0.01, 0), stringsAsFactors = FALSE ), - HgncId = c("1", "2", "4") + EntityNamespace = rep("HGNC", 3), + EntityId = c("1", "2", "4") ) result <- MSstatsBioNet:::.filterGetSubnetworkFromIndraInput( input, pvalueCutoff = 0.05, logfc_cutoff = NULL, diff --git a/vignettes/Cytoscape-Visualization.Rmd b/vignettes/Cytoscape-Visualization.Rmd index 2f0e128..2b3c654 100644 --- a/vignettes/Cytoscape-Visualization.Rmd +++ b/vignettes/Cytoscape-Visualization.Rmd @@ -69,16 +69,16 @@ edges_ptm <- data.frame( cytoscapeNetwork(nodes_ptm, edges_ptm, nodeFontSize = 14) -# ── Example 4 · HGNC labels + left-to-right layout ───────────────────────── +# ── Example 4 · Entity labels + left-to-right layout ─────────────────────── -nodes_hgnc <- data.frame( - id = c("ENSG001", "ENSG002", "ENSG003"), - hgncName = c("TP53", "MDM2", "CDKN1A"), - logFC = c( 1.0, -0.5, 2.0), +nodes_entity <- data.frame( + id = c("ENSG001", "ENSG002", "ENSG003"), + entityName = c("TP53", "MDM2", "CDKN1A"), + logFC = c( 1.0, -0.5, 2.0), stringsAsFactors = FALSE ) -edges_hgnc <- data.frame( +edges_entity <- data.frame( source = c("ENSG001", "ENSG001"), target = c("ENSG002", "ENSG003"), interaction = c("Activation", "IncreaseAmount"), @@ -86,8 +86,8 @@ edges_hgnc <- data.frame( ) cytoscapeNetwork( - nodes_hgnc, edges_hgnc, - displayLabelType = "hgncName", + nodes_entity, edges_entity, + displayLabelType = "entityName", layoutOptions = list(rankDir = "LR", rankSep = 120) ) diff --git a/vignettes/MSstatsBioNet.Rmd b/vignettes/MSstatsBioNet.Rmd index d3fd498..57ba0bc 100644 --- a/vignettes/MSstatsBioNet.Rmd +++ b/vignettes/MSstatsBioNet.Rmd @@ -125,7 +125,7 @@ The function `previewNetworkInBrowser` then takes the output of an internet browser to view the subnetwork ```{r} -previewNetworkInBrowser(subnetwork$nodes, subnetwork$edges, displayLabelType = "hgncName") +previewNetworkInBrowser(subnetwork$nodes, subnetwork$edges, displayLabelType = "entityName") ``` In the network diagram displayed using CytoscapeJS, you should see three arrows diff --git a/vignettes/PTM-Analysis.Rmd b/vignettes/PTM-Analysis.Rmd index 10d43ca..3c76ffd 100644 --- a/vignettes/PTM-Analysis.Rmd +++ b/vignettes/PTM-Analysis.Rmd @@ -67,7 +67,7 @@ head(subnetwork$edges) Visualize the subnetwork on your browser ```{r} -previewNetworkInBrowser(subnetwork$nodes, subnetwork$edges, displayLabelType = "hgncName") +previewNetworkInBrowser(subnetwork$nodes, subnetwork$edges, displayLabelType = "entityName") ``` # Session info