ELELAB · lauragjensen · May 13, 2026 · May 13, 2026
diff --git a/dot_plot/README.md b/dot_plot/README.md
@@ -1,5 +1,5 @@
 # MAVISp dot plot
-*Last updated*: 12/02/26
+*Last updated*: 13/05/26
 
 ## Description
 
@@ -27,14 +27,15 @@ It produces:
 ```
 python dot_plot.py [-h] -i INPUT [-v CLINVAR_DICTIONARY] [-o OUTPUT]
                    [-m MUTATIONS [MUTATIONS ...]] [-r RESIDUES [RESIDUES ...]]
-                   [-R REVEL_THRESHOLD] [-D DEMASK_THRESHOLD]
-                   [-G GEMME_THRESHOLD] [-x X_LIM] [-f FIGSIZE FIGSIZE]
-                   [-pltR] [-pltD]
+                   [-R REVEL_THRESHOLD] [-P POPEVE_THRESHOLD]
+                   [-D DEMASK_THRESHOLD] [-G GEMME_THRESHOLD]
+                   [-x X_LIM] [-f FIGSIZE FIGSIZE]
+                   [-pltR] [-pltP] [-pltD]
                    [-pltC {all,uncertain,benign,likely_benign,pathogenic,likely_pathogenic,conflicting}
                           [{all,uncertain,benign,likely_benign,pathogenic,likely_pathogenic,conflicting} ...]]
                    [-colC]
                    [-pltS {saturation,cosmic,cbioportal} [{saturation,cosmic,cbioportal} ...]]
-                   [-vep {none,alphamissense,revel,gemme,eve}]
+                   [-vep {none,alphamissense,revel,gemme,eve,popeve}]
                    [-lgof]
 ```
 
@@ -44,16 +45,18 @@ python dot_plot.py [-h] -i INPUT [-v CLINVAR_DICTIONARY] [-o OUTPUT]
 - `-m/--mutations`: comma-separated mutations to display (e.g. `A4G,F55K`). Mutually exclusive with `-r`.
 - `-r/--residues`: comma-separated residue positions to display (e.g. `4,55`). Mutually exclusive with `-m`.
 - `-R/--revel_threshold`: REVEL pathogenic threshold (default: `0.5`).
+- `-P/--popeve_threshold`: popEVE deleterious threshold (default: `-4.617`).
 - `-D/--demask_threshold`: DeMaSk delta-fitness threshold for LoF/GoF calls (default: `0.25`).
 - `-G/--gemme_threshold`: GEMME threshold (default: `-3.0`).
 - `-x/--x_lim`: number of mutations per panel before splitting across multiple figures (default: `50`).
 - `-f/--figsize`: figure width and height (default: `14 5`). The default works well for ~50 mutations and 7–8 labels.
 - `-pltR/--plot_Revel`: add REVEL classifications to the dot plot.
+- `-pltP/--plot_popEVE`: add popEVE classifications to the dot plot.
 - `-pltD/--plot_Demask`: add DeMaSk predicted consequence (LoF/GoF) for mutations meeting the `-D` threshold.
 - `-pltC/--plot_Clinvar`: filter to specific ClinVar categories (e.g. `pathogenic uncertain`). Requires `dictionary.csv`.
 - `-colC/--color_Clinvar`: colour the x-axis labels according to ClinVar categories. Requires `dictionary.csv`.
 - `-pltS/--plot_Source`: filter mutations by source (`saturation`, `cosmic`, `cbioportal`). Multiple sources can be provided; filters are additive with `-pltC`.
-- `-vep/--vep-filter`: restrict `mechanistic_indicators_out.csv` to mutations predicted as pathogenic by the selected VEP. Choices are `alphamissense`, `revel`, `gemme`, `eve`, or `none` (default). Supplying `-vep` without an argument defaults to `alphamissense`.
+- `-vep/--vep-filter`: restrict `mechanistic_indicators_out.csv` to mutations predicted as pathogenic by the selected VEP. Choices are `alphamissense`, `revel`, `gemme`, `eve`, `popeve`, or `none` (default). Supplying `-vep` without an argument defaults to `alphamissense`.
 - `-lgof/--vep-filter-lgof`: when set, only keep entries classified as DeMaSk LoF or GoF in `mechanistic_indicators_out.csv`. By default, this filtering is not performed.
 
 ## MAVISp dot plot v2:  
@@ -86,7 +89,7 @@ See the `examples` directory and the accompanying `do.sh` scripts for a minimal
 
 Running the script produces:
 - `dot_plot.pdf` (and additional numbered PDFs if more mutations exceed `-x`). PNGs are also written when any of `-m`, `-r`, `-pltS`, or `-pltC` is used.
-- `log.txt`, summarising how many variants satisfy each classifier (REVEL, GEMME, DeMaSk, EVE, AlphaMissense) and providing module-level counts.
+- `log.txt`, summarising how many variants satisfy each classifier (REVEL, GEMME, DeMaSk, EVE, AlphaMissense, popEVE) and providing module-level counts.
 - `mechanistic_indicators_out.csv`, containing the filtered subset of mutations with at least one module effect and the consolidated `MAVISp Effects` column. This file honours all filters applied through `-m`, `-r`, `-pltS`, `-pltC`, `-vep`, and `-lgof`.
 
 Notes:

diff --git a/dot_plot/dot_plot_v2.py b/dot_plot/dot_plot_v2.py
@@ -285,8 +285,8 @@ def get_clinvar_columns(df, clinvar_class_type: str):
     raise ValueError(f"ClinVar mode '{clinvar_class_type}' requested, but the corresponding columns are missing.")
 
 
-def process_input(full_df, r_cutoff, d_cutoff, g_cutoff, residues, mutations,
-    clinvar_dict, plot_Revel, plot_Demask, plot_Source, plot_Clinvar, color_Clinvar, clinvar_cols):
+def process_input(full_df, r_cutoff, p_cutoff, d_cutoff, g_cutoff, residues, mutations,
+    clinvar_dict, plot_Revel, plot_popEVE, plot_Demask, plot_Source, plot_Clinvar, color_Clinvar, clinvar_cols):
     ''' Read MAVISp aggregated table.
 
     The function takes as input a MAVISp csv file and returns
@@ -351,6 +351,7 @@ def process_input(full_df, r_cutoff, d_cutoff, g_cutoff, residues, mutations,
                     'AlloSigMA 2 predicted consequence - pockets and interfaces' in x or \
                     ('AlloSigMA2-PSN classification' in x and not 'AlloSigMA 2 mutation type' in x) or\
                     'PTM effect in ' in x or 'REVEL score' in x or \
+                    'popEVE score' in x or \
                     'EVE classification (25% Uncertain)' in x or \
                     'DeMaSk delta fitness' in x or \
                     'DeMaSk predicted consequence' in x or \
@@ -378,12 +379,25 @@ def process_input(full_df, r_cutoff, d_cutoff, g_cutoff, residues, mutations,
     for d in [df, full_df]:
         d['REVEL score'] = d['REVEL score'].apply(convert_to_float)
 
+        # Convert popEVE score column to numeric values.
+        # Invalid/non-numeric values are converted to NaN.
+        if 'popEVE score' in d.columns:
+            d['popEVE score'] = pd.to_numeric(d['popEVE score'], errors='coerce')
+
 
     # Add REVEL score interpretation column
     df['REVEL'] = np.where(df['REVEL score'].isna(), None,
                                 np.where(df['REVEL score'] >= r_cutoff,
                                     'Damaging', 'Neutral'))
 
+    try:
+        # Add popEVE score interpretation column
+        df['popEVE'] = np.where(df['popEVE score'].isna(), None,
+                                np.where(df['popEVE score'] < p_cutoff,
+                                         'Damaging', 'Neutral'))
+    except:
+        log.warning(f'- no popEVE found in MAVISp csv.')
+
     try:
         # Convert GEMME score into absolute value
         df['GEMME predicted consequence'] = np.where(
@@ -400,12 +414,17 @@ def process_input(full_df, r_cutoff, d_cutoff, g_cutoff, residues, mutations,
                 'Neutral')))
 
     # Drop score columns
+    score_cols_to_drop = ['REVEL score',
+                        'DeMaSk delta fitness']
+
     if 'GEMME Score' in df.columns:
-        df.drop(columns = ['REVEL score','DeMaSk delta fitness', 'GEMME Score'],
-            inplace = True)
-    else:
-        df.drop(columns = ['REVEL score','DeMaSk delta fitness'],
-            inplace = True)
+        score_cols_to_drop.append('GEMME Score')
+
+    if 'popEVE score' in df.columns:
+        score_cols_to_drop.append('popEVE score')
+
+    df.drop(columns=score_cols_to_drop, inplace=True)
+
 
     # Sort columns based on broad effect categories
     functional_cols = [col for col in df.columns if 'functional' in col.lower() and 'experimental data classification' not in col.lower()]
@@ -468,7 +487,8 @@ def process_input(full_df, r_cutoff, d_cutoff, g_cutoff, residues, mutations,
         'AlphaMissense classification',
         'EVE classification (25% Uncertain)',
         'GEMME predicted consequence',
-        'REVEL'
+        'REVEL',
+        'popEVE'
     ]
     demask_pred_col = 'DeMaSk predicted consequence'
     experimental_present = [col for col in experimental_cols if col in df.columns]
@@ -669,6 +689,9 @@ def process_input(full_df, r_cutoff, d_cutoff, g_cutoff, residues, mutations,
     if not plot_Revel and 'REVEL' in plot_df.columns:
         plot_df = plot_df.drop(columns=['REVEL'])
 
+    if not plot_popEVE and 'popEVE' in plot_df.columns:
+        plot_df = plot_df.drop(columns=['popEVE'])
+
     if not plot_Demask and 'DeMaSk predicted consequence' in plot_df.columns:
         plot_df = plot_df.drop(columns=['DeMaSk predicted consequence'])
 
@@ -882,7 +905,7 @@ def plot(df, full_df, width, height, xlim, clinvar_flag, clinvar_class_type, cli
 
     return figures
 
-def generate_summary(data,d_cutoff,r_cutoff, clinvar_cols):
+def generate_summary(data,d_cutoff,r_cutoff, p_cutoff, clinvar_cols):
     ''' Summary log.txt file.
 
     The function is aimed at summarizing the number of mutations
@@ -1566,6 +1589,18 @@ def generate_summary(data,d_cutoff,r_cutoff, clinvar_cols):
         f'which could be of interest for further investigation:\n'
     out += f'-- {revel_d}\n'
 
+    # popEVE score < -4.617 (default)
+    try:
+        popeve_d = data_d.index[data_d['popEVE score'] < p_cutoff].to_list()
+
+        out += f'- We aggregated all the variants that have at least one of the MAVISp modules ' \
+               f'with a predicted damaging effect (except for PTM.function) and retained only ' \
+               f'the ones with a popEVE score < {p_cutoff} for a total of {len(popeve_d)} variants ' \
+               f'which could be of interest for further investigation:\n'
+        out += f'-- {popeve_d}\n'
+    except KeyError:
+        out += '\n- popEVE score not available.\n'
+
     # Demask
     demask_d = data_d.index[(data_d['DeMaSk delta fitness'] >= d_cutoff) | (data_d['DeMaSk delta fitness'] <= -d_cutoff)].to_list()
     out += f'- We aggregated all the variants that have at least one of the MAVISp modules ' \
@@ -1900,6 +1935,8 @@ def filter_vep_summary(summary, df, vep_filter, glof_filter):
         filtered_index_vep = df[df['REVEL'] == 1].index
     elif vep_filter == 'eve':
         filtered_index_vep = df[df['EVE classification (25% Uncertain)'] == 1].index
+    elif vep_filter == 'popeve':
+        filtered_index_vep = df[df['popEVE'] == 1].index
     elif vep_filter == 'none':
         filtered_index_vep = df.index
 
@@ -1981,6 +2018,14 @@ def main():
 	                    type = float,
                         help = R_helpstr)
 
+    P_default = -4.617
+    P_helpstr = f"Threshold to classify a mutation according to the " \
+                f"popEVE score. (Default = {P_default})"
+    parser.add_argument("-P", "--popeve_threshold",
+                        default=P_default,
+                        type=float,
+                        help=P_helpstr)
+
     D_default = 0.25
     D_helpstr = f"Threshold to classify a mutation according to the " \
                 f"DeMask score. (Default = {D_default})"
@@ -2021,12 +2066,18 @@ def main():
                         action = 'store_true',
                         help = pltR_helpstr)
 
+    pltP_helpstr = f"Plotting of popEVE classification. (Default = None)"
+    parser.add_argument("-pltP", "--plot_popEVE",
+                        action='store_true',
+                        help=pltP_helpstr)
+
     pltD_helpstr = f"Plotting of Demask LoF/GoF if" \
                     f" mutation is above demask threshold. " \
                     f"(Default = None)"
     parser.add_argument("-pltD", "--plot_Demask",
                         action = 'store_true',
                         help = pltD_helpstr)
+
     clinvarclasstype_helpstr = f"ClinVar classification type to use for plotting/coloring. " \
                                f"Choices:aggregated, germline, oncogenicity, clinical_impact."
     parser.add_argument("-cct", "--clinvar_class_type",
@@ -2077,10 +2128,10 @@ def main():
                         help = pltS_helpstr)
 
     AMx_helpstr = "Restrict mechanisitc indicators output to pathogenic variants. Choose the VEP to use" \
-                  "to detect pathogenic variants between none, alphamissense, revel, gemme, eve. If this option" \
+                  "to detect pathogenic variants between none, alphamissense, revel, gemme, eve, popeve. If this option" \
                   "is used without argument it will default to alphamissense"
     parser.add_argument("-vep", "--vep-filter",
-                        choices=["none", "alphamissense", "revel", "gemme", "eve"],
+                        choices=["none", "alphamissense", "revel", "gemme", "eve", "popeve"],
                         nargs="?",
                         const="alphamissense",
                         default="none",
@@ -2167,12 +2218,14 @@ def main():
     try:
         plot_df, classification_df, dataframe, clinvar_mapped_df =  process_input(full_df = full_df,
                                                           r_cutoff = args.revel_threshold,
+                                                          p_cutoff=args.popeve_threshold,
                                                           d_cutoff = args.demask_threshold,
                                                           g_cutoff= args.gemme_threshold,
                                                           residues = args.residues,
                                                           mutations = args.mutations,
                                                           clinvar_dict = clinvar_dict,
                                                           plot_Revel = args.plot_Revel,
+                                                          plot_popEVE=args.plot_popEVE,
                                                           plot_Demask = args.plot_Demask,
                                                           plot_Source = args.plot_Source,
                                                           plot_Clinvar = args.plot_Clinvar,
@@ -2192,6 +2245,7 @@ def main():
         summary, summary_df = generate_summary(data = dataframe,
                                                d_cutoff = args.demask_threshold,
                                                r_cutoff = args.revel_threshold,
+                                               p_cutoff=args.popeve_threshold,
                                                clinvar_cols = clinvar_cols)
         out.write(summary)