diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 864d864..c2141c0 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -108,3 +108,4 @@ user-study/images @StevenSong
 user-study/metadata.csv @StevenSong
 user-study/prepare_samples.ipynb @StevenSong
 user-study/results.csv @StevenSong
+user-study/user_study_analysis.ipynb @sahilsethi0105
diff --git a/user-study/user_study_analysis.ipynb b/user-study/user_study_analysis.ipynb
new file mode 100644
index 0000000..21d249f
--- /dev/null
+++ b/user-study/user_study_analysis.ipynb
@@ -0,0 +1,2176 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a8401e54",
+   "metadata": {},
+   "source": [
+    "# ProtoSSL User study analysis\n",
+    "\n",
+    "This notebook conducts:\n",
+    "\n",
+    "- **Primary analysis:** participant-level paired comparison of the proportion of responses rated as good for ProtoSSL vs ProtoECGNet, done separately for the two tasks.\n",
+    "- **Primary test:** two-sided **Wilcoxon signed-rank test** across participants.\n",
+    "- **Comparative A/B/Both/Neither question:** descriptive summaries\n",
+    "- **Inter-rater agreement:** **Fleiss' kappa** for the binary yes/no ratings, reported overall and by label.\n",
+    "\n",
+    "The **participants** are the primary unit of inference.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "73ba5be4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import wilcoxon, ttest_rel, binomtest, t as tdist\n",
+    "from statsmodels.stats.inter_rater import fleiss_kappa\n",
+    "\n",
+    "pd.set_option(\"display.max_columns\", None)\n",
+    "pd.set_option(\"display.width\", 200)\n",
+    "pd.set_option(\"display.float_format\", lambda x: f\"{x:.4f}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d094a664",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "results shape: (7, 131)\n",
+      "metadata shape: (20, 15)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>record_id</th>\n",
+       "      <th>redcap_survey_identifier</th>\n",
+       "      <th>user_study_form_timestamp</th>\n",
+       "      <th>consent</th>\n",
+       "      <th>prototypes_quality_choices</th>\n",
+       "      <th>prototypea_quality</th>\n",
+       "      <th>prototypeb_quality</th>\n",
+       "      <th>prototypes_quality_choices_2</th>\n",
+       "      <th>explanation_a</th>\n",
+       "      <th>explanation_b</th>\n",
+       "      <th>case1_prototypes_quality_choices</th>\n",
+       "      <th>case1_prototypea_quality</th>\n",
+       "      <th>case1_prototypeb_quality</th>\n",
+       "      <th>case1_prototypes_quality_choices_2</th>\n",
+       "      <th>case1_explanation_a</th>\n",
+       "      <th>case1_explanation_b</th>\n",
+       "      <th>case2_prototypes_quality_choices</th>\n",
+       "      <th>case2_prototypea_quality</th>\n",
+       "      <th>case2_prototypeb_quality</th>\n",
+       "      <th>case2_prototypes_quality_choices_2</th>\n",
+       "      <th>case2_explanation_a</th>\n",
+       "      <th>case2_explanation_b</th>\n",
+       "      <th>case3_prototypes_quality_choices</th>\n",
+       "      <th>case3_prototypea_quality</th>\n",
+       "      <th>case3_prototypeb_quality</th>\n",
+       "      <th>case3_prototypes_quality_choices_2</th>\n",
+       "      <th>case3_explanation_a</th>\n",
+       "      <th>case3_explanation_b</th>\n",
+       "      <th>case4_prototypes_quality_choices</th>\n",
+       "      <th>case4_prototypea_quality</th>\n",
+       "      <th>case4_prototypeb_quality</th>\n",
+       "      <th>case4_prototypes_quality_choices_2</th>\n",
+       "      <th>case4_explanation_a</th>\n",
+       "      <th>case4_explanation_b</th>\n",
+       "      <th>case5_prototypes_quality_choices</th>\n",
+       "      <th>case5_prototypea_quality</th>\n",
+       "      <th>case5_prototypeb_quality</th>\n",
+       "      <th>case5_prototypes_quality_choices_2</th>\n",
+       "      <th>case5_explanation_a</th>\n",
+       "      <th>case5_explanation_b</th>\n",
+       "      <th>case6_prototypes_quality_choices</th>\n",
+       "      <th>case6_prototypea_quality</th>\n",
+       "      <th>case6_prototypeb_quality</th>\n",
+       "      <th>case6_prototypes_quality_choices_2</th>\n",
+       "      <th>case6_explanation_a</th>\n",
+       "      <th>case6_explanation_b</th>\n",
+       "      <th>case7_prototypes_quality_choices</th>\n",
+       "      <th>case7_prototypea_quality</th>\n",
+       "      <th>case7_prototypeb_quality</th>\n",
+       "      <th>case7_prototypes_quality_choices_2</th>\n",
+       "      <th>case7_explanation_a</th>\n",
+       "      <th>case7_explanation_b</th>\n",
+       "      <th>case8_prototypes_quality_choices</th>\n",
+       "      <th>case8_prototypea_quality</th>\n",
+       "      <th>case8_prototypeb_quality</th>\n",
+       "      <th>case8_prototypes_quality_choices_2</th>\n",
+       "      <th>case8_explanation_a</th>\n",
+       "      <th>case8_explanation_b</th>\n",
+       "      <th>case9_prototypes_quality_choices</th>\n",
+       "      <th>case9_prototypea_quality</th>\n",
+       "      <th>case9_prototypeb_quality</th>\n",
+       "      <th>case9_prototypes_quality_choices_2</th>\n",
+       "      <th>case9_explanation_a</th>\n",
+       "      <th>case9_explanation_b</th>\n",
+       "      <th>case10_prototypes_quality_choices</th>\n",
+       "      <th>case10_prototypea_quality</th>\n",
+       "      <th>case10_prototypeb_quality</th>\n",
+       "      <th>case10_prototypes_quality_choices_2</th>\n",
+       "      <th>case10_explanation_a</th>\n",
+       "      <th>case10_explanation_b</th>\n",
+       "      <th>case11_prototypes_quality_choices</th>\n",
+       "      <th>case11_prototypea_quality</th>\n",
+       "      <th>case11_prototypeb_quality</th>\n",
+       "      <th>case11_prototypes_quality_choices_2</th>\n",
+       "      <th>case11_explanation_a</th>\n",
+       "      <th>case11_explanation_b</th>\n",
+       "      <th>case12_prototypes_quality_choices</th>\n",
+       "      <th>case12_prototypea_quality</th>\n",
+       "      <th>case12_prototypeb_quality</th>\n",
+       "      <th>case12_prototypes_quality_choices_2</th>\n",
+       "      <th>case12_explanation_a</th>\n",
+       "      <th>case12_explanation_b</th>\n",
+       "      <th>case13_prototypes_quality_choices</th>\n",
+       "      <th>case13_prototypea_quality</th>\n",
+       "      <th>case13_prototypeb_quality</th>\n",
+       "      <th>case13_prototypes_quality_choices_2</th>\n",
+       "      <th>case13_explanation_a</th>\n",
+       "      <th>case13_explanation_b</th>\n",
+       "      <th>case14_prototypes_quality_choices</th>\n",
+       "      <th>case14_prototypea_quality</th>\n",
+       "      <th>case14_prototypeb_quality</th>\n",
+       "      <th>case14_prototypes_quality_choices_2</th>\n",
+       "      <th>case14_explanation_a</th>\n",
+       "      <th>case14_explanation_b</th>\n",
+       "      <th>case15_prototypes_quality_choices</th>\n",
+       "      <th>case15_prototypea_quality</th>\n",
+       "      <th>case15_prototypeb_quality</th>\n",
+       "      <th>case15_prototypes_quality_choices_2</th>\n",
+       "      <th>case15_explanation_a</th>\n",
+       "      <th>case15_explanation_b</th>\n",
+       "      <th>case16_prototypes_quality_choices</th>\n",
+       "      <th>case16_prototypea_quality</th>\n",
+       "      <th>case16_prototypeb_quality</th>\n",
+       "      <th>case16_prototypes_quality_choices_2</th>\n",
+       "      <th>case16_explanation_a</th>\n",
+       "      <th>case16_explanation_b</th>\n",
+       "      <th>case17_prototypes_quality_choices</th>\n",
+       "      <th>case17_prototypea_quality</th>\n",
+       "      <th>case17_prototypeb_quality</th>\n",
+       "      <th>case17_prototypes_quality_choices_2</th>\n",
+       "      <th>case17_explanation_a</th>\n",
+       "      <th>case17_explanation_b</th>\n",
+       "      <th>case18_prototypes_quality_choices</th>\n",
+       "      <th>case18_prototypea_quality</th>\n",
+       "      <th>case18_prototypeb_quality</th>\n",
+       "      <th>case18_prototypes_quality_choices_2</th>\n",
+       "      <th>case18_explanation_a</th>\n",
+       "      <th>case18_explanation_b</th>\n",
+       "      <th>case19_prototypes_quality_choices</th>\n",
+       "      <th>case19_prototypea_quality</th>\n",
+       "      <th>case19_prototypeb_quality</th>\n",
+       "      <th>case19_prototypes_quality_choices_2</th>\n",
+       "      <th>case19_explanation_a</th>\n",
+       "      <th>case19_explanation_b</th>\n",
+       "      <th>case20_prototypes_quality_choices</th>\n",
+       "      <th>case20_prototypea_quality</th>\n",
+       "      <th>case20_prototypeb_quality</th>\n",
+       "      <th>case20_prototypes_quality_choices_2</th>\n",
+       "      <th>case20_explanation_a</th>\n",
+       "      <th>case20_explanation_b</th>\n",
+       "      <th>user_study_form_complete</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2026-04-06 18:18:56</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2026-04-09 11:26:53</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   record_id  redcap_survey_identifier user_study_form_timestamp  consent  prototypes_quality_choices  prototypea_quality  prototypeb_quality  prototypes_quality_choices_2  explanation_a  \\\n",
+       "0          5                       NaN       2026-04-06 18:18:56        1                           2                   0                   1                             2              0   \n",
+       "1          7                       NaN       2026-04-09 11:26:53        1                           2                   0                   1                             2              0   \n",
+       "\n",
+       "   explanation_b  case1_prototypes_quality_choices  case1_prototypea_quality  case1_prototypeb_quality  case1_prototypes_quality_choices_2  case1_explanation_a  case1_explanation_b  \\\n",
+       "0              1                                 3                         1                         1                                   3                    1                    1   \n",
+       "1              1                                 3                         1                         1                                   1                    1                    1   \n",
+       "\n",
+       "   case2_prototypes_quality_choices  case2_prototypea_quality  case2_prototypeb_quality  case2_prototypes_quality_choices_2  case2_explanation_a  case2_explanation_b  \\\n",
+       "0                                 1                         1                         1                                   1                    1                    1   \n",
+       "1                                 3                         1                         1                                   3                    1                    1   \n",
+       "\n",
+       "   case3_prototypes_quality_choices  case3_prototypea_quality  case3_prototypeb_quality  case3_prototypes_quality_choices_2  case3_explanation_a  case3_explanation_b  \\\n",
+       "0                                 2                         1                         1                                   2                    1                    1   \n",
+       "1                                 3                         0                         0                                   2                    0                    1   \n",
+       "\n",
+       "   case4_prototypes_quality_choices  case4_prototypea_quality  case4_prototypeb_quality  case4_prototypes_quality_choices_2  case4_explanation_a  case4_explanation_b  \\\n",
+       "0                                 3                         1                         1                                   1                    1                    1   \n",
+       "1                                 1                         1                         0                                   1                    1                    0   \n",
+       "\n",
+       "   case5_prototypes_quality_choices  case5_prototypea_quality  case5_prototypeb_quality  case5_prototypes_quality_choices_2  case5_explanation_a  case5_explanation_b  \\\n",
+       "0                                 3                         1                         1                                   3                    1                    1   \n",
+       "1                                 3                         1                         1                                   3                    1                    1   \n",
+       "\n",
+       "   case6_prototypes_quality_choices  case6_prototypea_quality  case6_prototypeb_quality  case6_prototypes_quality_choices_2  case6_explanation_a  case6_explanation_b  \\\n",
+       "0                                 3                         1                         1                                   3                    1                    1   \n",
+       "1                                 1                         1                         0                                   1                    1                    0   \n",
+       "\n",
+       "   case7_prototypes_quality_choices  case7_prototypea_quality  case7_prototypeb_quality  case7_prototypes_quality_choices_2  case7_explanation_a  case7_explanation_b  \\\n",
+       "0                                 3                         1                         1                                   3                    1                    1   \n",
+       "1                                 1                         1                         1                                   1                    1                    0   \n",
+       "\n",
+       "   case8_prototypes_quality_choices  case8_prototypea_quality  case8_prototypeb_quality  case8_prototypes_quality_choices_2  case8_explanation_a  case8_explanation_b  \\\n",
+       "0                                 3                         1                         1                                   3                    1                    1   \n",
+       "1                                 3                         1                         1                                   3                    1                    1   \n",
+       "\n",
+       "   case9_prototypes_quality_choices  case9_prototypea_quality  case9_prototypeb_quality  case9_prototypes_quality_choices_2  case9_explanation_a  case9_explanation_b  \\\n",
+       "0                                 3                         1                         1                                   1                    1                    1   \n",
+       "1                                 3                         1                         1                                   1                    1                    0   \n",
+       "\n",
+       "   case10_prototypes_quality_choices  case10_prototypea_quality  case10_prototypeb_quality  case10_prototypes_quality_choices_2  case10_explanation_a  case10_explanation_b  \\\n",
+       "0                                  2                          1                          1                                    2                     1                     1   \n",
+       "1                                  3                          1                          1                                    2                     1                     0   \n",
+       "\n",
+       "   case11_prototypes_quality_choices  case11_prototypea_quality  case11_prototypeb_quality  case11_prototypes_quality_choices_2  case11_explanation_a  case11_explanation_b  \\\n",
+       "0                                  2                          1                          1                                    3                     1                     1   \n",
+       "1                                  1                          1                          0                                    1                     1                     0   \n",
+       "\n",
+       "   case12_prototypes_quality_choices  case12_prototypea_quality  case12_prototypeb_quality  case12_prototypes_quality_choices_2  case12_explanation_a  case12_explanation_b  \\\n",
+       "0                                  2                          1                          1                                    3                     1                     1   \n",
+       "1                                  2                          1                          1                                    3                     1                     1   \n",
+       "\n",
+       "   case13_prototypes_quality_choices  case13_prototypea_quality  case13_prototypeb_quality  case13_prototypes_quality_choices_2  case13_explanation_a  case13_explanation_b  \\\n",
+       "0                                  3                          1                          1                                    3                     1                     1   \n",
+       "1                                  2                          0                          1                                    3                     1                     1   \n",
+       "\n",
+       "   case14_prototypes_quality_choices  case14_prototypea_quality  case14_prototypeb_quality  case14_prototypes_quality_choices_2  case14_explanation_a  case14_explanation_b  \\\n",
+       "0                                  3                          1                          1                                    3                     1                     1   \n",
+       "1                                  3                          1                          1                                    1                     1                     1   \n",
+       "\n",
+       "   case15_prototypes_quality_choices  case15_prototypea_quality  case15_prototypeb_quality  case15_prototypes_quality_choices_2  case15_explanation_a  case15_explanation_b  \\\n",
+       "0                                  3                          1                          1                                    3                     1                     1   \n",
+       "1                                  3                          1                          1                                    2                     1                     1   \n",
+       "\n",
+       "   case16_prototypes_quality_choices  case16_prototypea_quality  case16_prototypeb_quality  case16_prototypes_quality_choices_2  case16_explanation_a  case16_explanation_b  \\\n",
+       "0                                  2                          0                          1                                    2                     0                     1   \n",
+       "1                                  2                          0                          1                                    2                     0                     1   \n",
+       "\n",
+       "   case17_prototypes_quality_choices  case17_prototypea_quality  case17_prototypeb_quality  case17_prototypes_quality_choices_2  case17_explanation_a  case17_explanation_b  \\\n",
+       "0                                  3                          1                          1                                    1                     1                     1   \n",
+       "1                                  1                          1                          0                                    1                     1                     0   \n",
+       "\n",
+       "   case18_prototypes_quality_choices  case18_prototypea_quality  case18_prototypeb_quality  case18_prototypes_quality_choices_2  case18_explanation_a  case18_explanation_b  \\\n",
+       "0                                  2                          0                          1                                    2                     0                     1   \n",
+       "1                                  2                          0                          1                                    2                     0                     1   \n",
+       "\n",
+       "   case19_prototypes_quality_choices  case19_prototypea_quality  case19_prototypeb_quality  case19_prototypes_quality_choices_2  case19_explanation_a  case19_explanation_b  \\\n",
+       "0                                  3                          1                          1                                    3                     1                     1   \n",
+       "1                                  1                          1                          1                                    1                     1                     0   \n",
+       "\n",
+       "   case20_prototypes_quality_choices  case20_prototypea_quality  case20_prototypeb_quality  case20_prototypes_quality_choices_2  case20_explanation_a  case20_explanation_b  user_study_form_complete  \n",
+       "0                                  3                          1                          1                                    2                     1                     1                         2  \n",
+       "1                                  2                          0                          1                                    2                     0                     1                         2  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results = pd.read_csv(\"results.csv\")\n",
+    "metadata = pd.read_csv(\"metadata.csv\")\n",
+    "\n",
+    "print(\"results shape:\", results.shape)\n",
+    "print(\"metadata shape:\", metadata.shape)\n",
+    "results.head(2)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7f870f0f",
+   "metadata": {},
+   "source": [
+    "### Decode REDCap responses into analysis tables\n",
+    "\n",
+    "`yesno` contains one row per participant × case × task × model for the binary yes/no questions.\n",
+    "\n",
+    "`prefs` contains one row per participant × case × task for the A/B/Both/Neither comparative question, decoded back to the actual model identities using the metadata file.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "16acb6b2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "yesno shape: (560, 6)\n",
+      "prefs shape: (280, 5)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>participant</th>\n",
+       "      <th>case_id</th>\n",
+       "      <th>label</th>\n",
+       "      <th>task</th>\n",
+       "      <th>model</th>\n",
+       "      <th>good</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>global</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>global</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>global</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   participant  case_id label    task        model  good\n",
+       "0            5        1   AMI  global     ProtoSSL     1\n",
+       "1            5        1   AMI  global  ProtoECGNet     1\n",
+       "2            5        1   AMI  paired     ProtoSSL     1\n",
+       "3            5        1   AMI  paired  ProtoECGNet     1\n",
+       "4            5        2   AMI  global     ProtoSSL     1"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "case_map = (\n",
+    "    metadata.rename(\n",
+    "        columns={\n",
+    "            \"Study Index\": \"case_id\",\n",
+    "            \"Label\": \"label\",\n",
+    "            \"ProtoSSL Assignment\": \"ssl_assignment\",\n",
+    "            \"ProtoECGNet Assignment\": \"ecg_assignment\",\n",
+    "        }\n",
+    "    )[[\"case_id\", \"label\", \"ssl_assignment\", \"ecg_assignment\"]]\n",
+    "    .copy()\n",
+    ")\n",
+    "case_map[\"case_id\"] = case_map[\"case_id\"].astype(int)\n",
+    "\n",
+    "pref_code = {1: \"A\", 2: \"B\", 3: \"Both\", 4: \"Neither\"}\n",
+    "\n",
+    "yes_rows = []\n",
+    "pref_rows = []\n",
+    "\n",
+    "for _, row in results.iterrows():\n",
+    "    participant = int(row[\"record_id\"])\n",
+    "    for case_id in range(1, 21):\n",
+    "        meta_row = case_map.loc[case_map[\"case_id\"] == case_id].iloc[0]\n",
+    "\n",
+    "        task_specs = [\n",
+    "            (\"global\",\n",
+    "             f\"case{case_id}_prototypea_quality\",\n",
+    "             f\"case{case_id}_prototypeb_quality\",\n",
+    "             f\"case{case_id}_prototypes_quality_choices\"),\n",
+    "            (\"paired\",\n",
+    "             f\"case{case_id}_explanation_a\",\n",
+    "             f\"case{case_id}_explanation_b\",\n",
+    "             f\"case{case_id}_prototypes_quality_choices_2\"),\n",
+    "        ]\n",
+    "\n",
+    "        for task, a_col, b_col, pref_col in task_specs:\n",
+    "            for shown_letter, col in [(\"A\", a_col), (\"B\", b_col)]:\n",
+    "                actual_model = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == shown_letter else \"ProtoECGNet\"\n",
+    "                yes_rows.append(\n",
+    "                    {\n",
+    "                        \"participant\": participant,\n",
+    "                        \"case_id\": case_id,\n",
+    "                        \"label\": meta_row[\"label\"],\n",
+    "                        \"task\": task,\n",
+    "                        \"model\": actual_model,\n",
+    "                        \"good\": int(row[col]),\n",
+    "                    }\n",
+    "                )\n",
+    "\n",
+    "            pref_value = pref_code[int(row[pref_col])]\n",
+    "            if pref_value in [\"A\", \"B\"]:\n",
+    "                actual_pref = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == pref_value else \"ProtoECGNet\"\n",
+    "            else:\n",
+    "                actual_pref = pref_value\n",
+    "\n",
+    "            pref_rows.append(\n",
+    "                {\n",
+    "                    \"participant\": participant,\n",
+    "                    \"case_id\": case_id,\n",
+    "                    \"label\": meta_row[\"label\"],\n",
+    "                    \"task\": task,\n",
+    "                    \"preference\": actual_pref,\n",
+    "                }\n",
+    "            )\n",
+    "\n",
+    "yesno = pd.DataFrame(yes_rows)\n",
+    "prefs = pd.DataFrame(pref_rows)\n",
+    "\n",
+    "print(\"yesno shape:\", yesno.shape)\n",
+    "print(\"prefs shape:\", prefs.shape)\n",
+    "yesno.head()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0dce4296",
+   "metadata": {},
+   "source": [
+    "### Descriptive summaries for the binary yes/no questions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "961b8054",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>task</th>\n",
+       "      <th>model</th>\n",
+       "      <th>n_yes</th>\n",
+       "      <th>n_total</th>\n",
+       "      <th>proportion</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>93</td>\n",
+       "      <td>140</td>\n",
+       "      <td>0.6643</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>global</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>128</td>\n",
+       "      <td>140</td>\n",
+       "      <td>0.9143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>95</td>\n",
+       "      <td>140</td>\n",
+       "      <td>0.6786</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>116</td>\n",
+       "      <td>140</td>\n",
+       "      <td>0.8286</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     task        model  n_yes  n_total  proportion\n",
+       "0  global  ProtoECGNet     93      140      0.6643\n",
+       "1  global     ProtoSSL    128      140      0.9143\n",
+       "2  paired  ProtoECGNet     95      140      0.6786\n",
+       "3  paired     ProtoSSL    116      140      0.8286"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "overall_yesno = (\n",
+    "    yesno.groupby([\"task\", \"model\"])[\"good\"]\n",
+    "    .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n",
+    "    .reset_index()\n",
+    ")\n",
+    "overall_yesno\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "da09502f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>task</th>\n",
+       "      <th>label</th>\n",
+       "      <th>model</th>\n",
+       "      <th>n_yes</th>\n",
+       "      <th>n_total</th>\n",
+       "      <th>proportion</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>23</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.6571</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>global</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>30</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.8571</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>33</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.9429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>32</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.9143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>19</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.5429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>32</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.9143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>global</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>18</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.5143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>global</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>34</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.9714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>23</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.6571</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>28</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.8000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>33</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.9429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>27</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.7714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>19</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.5429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>30</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.8571</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>ProtoECGNet</td>\n",
+       "      <td>20</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.5714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>ProtoSSL</td>\n",
+       "      <td>31</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0.8857</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      task  label        model  n_yes  n_total  proportion\n",
+       "0   global    AMI  ProtoECGNet     23       35      0.6571\n",
+       "1   global    AMI     ProtoSSL     30       35      0.8571\n",
+       "2   global  CLBBB  ProtoECGNet     33       35      0.9429\n",
+       "3   global  CLBBB     ProtoSSL     32       35      0.9143\n",
+       "4   global  CRBBB  ProtoECGNet     19       35      0.5429\n",
+       "5   global  CRBBB     ProtoSSL     32       35      0.9143\n",
+       "6   global    PVC  ProtoECGNet     18       35      0.5143\n",
+       "7   global    PVC     ProtoSSL     34       35      0.9714\n",
+       "8   paired    AMI  ProtoECGNet     23       35      0.6571\n",
+       "9   paired    AMI     ProtoSSL     28       35      0.8000\n",
+       "10  paired  CLBBB  ProtoECGNet     33       35      0.9429\n",
+       "11  paired  CLBBB     ProtoSSL     27       35      0.7714\n",
+       "12  paired  CRBBB  ProtoECGNet     19       35      0.5429\n",
+       "13  paired  CRBBB     ProtoSSL     30       35      0.8571\n",
+       "14  paired    PVC  ProtoECGNet     20       35      0.5714\n",
+       "15  paired    PVC     ProtoSSL     31       35      0.8857"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "label_yesno = (\n",
+    "    yesno.groupby([\"task\", \"label\", \"model\"])[\"good\"]\n",
+    "    .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n",
+    "    .reset_index()\n",
+    ")\n",
+    "label_yesno\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "03a50af7",
+   "metadata": {},
+   "source": [
+    "### Primary analysis\n",
+    "\n",
+    "For each participant and each task, compute the proportion of responses rated as good for each model across the 20 cases. Then compare ProtoSSL vs ProtoECGNet with a **paired Wilcoxon signed-rank test**.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "3194ed44",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>model</th>\n",
+       "      <th>participant</th>\n",
+       "      <th>task</th>\n",
+       "      <th>ProtoECGNet</th>\n",
+       "      <th>ProtoSSL</th>\n",
+       "      <th>difference</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.9000</td>\n",
+       "      <td>1.0000</td>\n",
+       "      <td>0.1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.9000</td>\n",
+       "      <td>1.0000</td>\n",
+       "      <td>0.1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>7</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.7000</td>\n",
+       "      <td>0.8000</td>\n",
+       "      <td>0.1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>7</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.6000</td>\n",
+       "      <td>0.8000</td>\n",
+       "      <td>0.2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>8</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.4500</td>\n",
+       "      <td>0.8000</td>\n",
+       "      <td>0.3500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>8</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.5000</td>\n",
+       "      <td>0.5500</td>\n",
+       "      <td>0.0500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>9</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.6000</td>\n",
+       "      <td>0.9500</td>\n",
+       "      <td>0.3500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>9</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.5500</td>\n",
+       "      <td>0.9000</td>\n",
+       "      <td>0.3500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>10</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.6000</td>\n",
+       "      <td>0.9500</td>\n",
+       "      <td>0.3500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>10</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.7500</td>\n",
+       "      <td>0.9500</td>\n",
+       "      <td>0.2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>11</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.6500</td>\n",
+       "      <td>0.9500</td>\n",
+       "      <td>0.3000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>11</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.8000</td>\n",
+       "      <td>0.9000</td>\n",
+       "      <td>0.1000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>12</td>\n",
+       "      <td>global</td>\n",
+       "      <td>0.7500</td>\n",
+       "      <td>0.9500</td>\n",
+       "      <td>0.2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>12</td>\n",
+       "      <td>paired</td>\n",
+       "      <td>0.6500</td>\n",
+       "      <td>0.7000</td>\n",
+       "      <td>0.0500</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "model  participant    task  ProtoECGNet  ProtoSSL  difference\n",
+       "0                5  global       0.9000    1.0000      0.1000\n",
+       "1                5  paired       0.9000    1.0000      0.1000\n",
+       "2                7  global       0.7000    0.8000      0.1000\n",
+       "3                7  paired       0.6000    0.8000      0.2000\n",
+       "4                8  global       0.4500    0.8000      0.3500\n",
+       "5                8  paired       0.5000    0.5500      0.0500\n",
+       "6                9  global       0.6000    0.9500      0.3500\n",
+       "7                9  paired       0.5500    0.9000      0.3500\n",
+       "8               10  global       0.6000    0.9500      0.3500\n",
+       "9               10  paired       0.7500    0.9500      0.2000\n",
+       "10              11  global       0.6500    0.9500      0.3000\n",
+       "11              11  paired       0.8000    0.9000      0.1000\n",
+       "12              12  global       0.7500    0.9500      0.2000\n",
+       "13              12  paired       0.6500    0.7000      0.0500"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "participant_summary = (\n",
+    "    yesno.groupby([\"participant\", \"task\", \"model\"])[\"good\"]\n",
+    "    .mean()\n",
+    "    .unstack(\"model\")\n",
+    "    .reset_index()\n",
+    ")\n",
+    "\n",
+    "participant_summary[\"difference\"] = (\n",
+    "    participant_summary[\"ProtoSSL\"] - participant_summary[\"ProtoECGNet\"]\n",
+    ")\n",
+    "\n",
+    "participant_summary\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b6ff4f59",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>task</th>\n",
+       "      <th>n_participants</th>\n",
+       "      <th>ProtoSSL_mean</th>\n",
+       "      <th>ProtoECGNet_mean</th>\n",
+       "      <th>mean_difference</th>\n",
+       "      <th>ci95_low</th>\n",
+       "      <th>ci95_high</th>\n",
+       "      <th>wilcoxon_W</th>\n",
+       "      <th>wilcoxon_p</th>\n",
+       "      <th>paired_t_p</th>\n",
+       "      <th>sign_test_p</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.9143</td>\n",
+       "      <td>0.6643</td>\n",
+       "      <td>0.2500</td>\n",
+       "      <td>0.1432</td>\n",
+       "      <td>0.3568</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0156</td>\n",
+       "      <td>0.0012</td>\n",
+       "      <td>0.0156</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.8286</td>\n",
+       "      <td>0.6786</td>\n",
+       "      <td>0.1500</td>\n",
+       "      <td>0.0501</td>\n",
+       "      <td>0.2499</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0156</td>\n",
+       "      <td>0.0104</td>\n",
+       "      <td>0.0156</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     task  n_participants  ProtoSSL_mean  ProtoECGNet_mean  mean_difference  ci95_low  ci95_high  wilcoxon_W  wilcoxon_p  paired_t_p  sign_test_p\n",
+       "0  global               7         0.9143            0.6643           0.2500    0.1432     0.3568      0.0000      0.0156      0.0012       0.0156\n",
+       "1  paired               7         0.8286            0.6786           0.1500    0.0501     0.2499      0.0000      0.0156      0.0104       0.0156"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "def participant_level_analysis(df):\n",
+    "    rows = []\n",
+    "    raw_wilcoxon_p = []\n",
+    "\n",
+    "    for task in [\"global\", \"paired\"]:\n",
+    "        sub = df[df[\"task\"] == task].copy()\n",
+    "        diffs = sub[\"difference\"].to_numpy()\n",
+    "\n",
+    "        w = wilcoxon(diffs, alternative=\"two-sided\", zero_method=\"wilcox\", method=\"exact\")\n",
+    "        ttest = ttest_rel(sub[\"ProtoSSL\"], sub[\"ProtoECGNet\"])\n",
+    "        sign = binomtest(np.sum(diffs > 0), np.sum(diffs != 0), p=0.5, alternative=\"two-sided\")\n",
+    "\n",
+    "        mean_diff = float(np.mean(diffs))\n",
+    "        sd_diff = float(np.std(diffs, ddof=1))\n",
+    "        se_diff = sd_diff / math.sqrt(len(diffs))\n",
+    "        tcrit = tdist.ppf(0.975, df=len(diffs) - 1)\n",
+    "        ci_low = mean_diff - tcrit * se_diff\n",
+    "        ci_high = mean_diff + tcrit * se_diff\n",
+    "\n",
+    "        rows.append(\n",
+    "            {\n",
+    "                \"task\": task,\n",
+    "                \"n_participants\": len(sub),\n",
+    "                \"ProtoSSL_mean\": sub[\"ProtoSSL\"].mean(),\n",
+    "                \"ProtoECGNet_mean\": sub[\"ProtoECGNet\"].mean(),\n",
+    "                \"mean_difference\": mean_diff,\n",
+    "                \"ci95_low\": ci_low,\n",
+    "                \"ci95_high\": ci_high,\n",
+    "                \"wilcoxon_W\": float(w.statistic),\n",
+    "                \"wilcoxon_p\": float(w.pvalue),\n",
+    "                \"paired_t_p\": float(ttest.pvalue),\n",
+    "                \"sign_test_p\": float(sign.pvalue),\n",
+    "            }\n",
+    "        )\n",
+    "        raw_wilcoxon_p.append(float(w.pvalue))\n",
+    "\n",
+    "    out = pd.DataFrame(rows)\n",
+    "    return out\n",
+    "\n",
+    "primary_results = participant_level_analysis(participant_summary)\n",
+    "primary_results\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c182260",
+   "metadata": {},
+   "source": [
+    "## Per-label participant-level summaries\n",
+    "\n",
+    "These are useful to show **where** the overall pattern comes from, but I recommend keeping them **descriptive only** in the paper because each label has only 5 cases.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "53b57b45",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>task</th>\n",
+       "      <th>label</th>\n",
+       "      <th>ProtoSSL_mean</th>\n",
+       "      <th>ProtoECGNet_mean</th>\n",
+       "      <th>mean_difference</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>0.8571</td>\n",
+       "      <td>0.6571</td>\n",
+       "      <td>0.2000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>0.9143</td>\n",
+       "      <td>0.9429</td>\n",
+       "      <td>-0.0286</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>0.9143</td>\n",
+       "      <td>0.5429</td>\n",
+       "      <td>0.3714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>global</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>0.9714</td>\n",
+       "      <td>0.5143</td>\n",
+       "      <td>0.4571</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>0.8000</td>\n",
+       "      <td>0.6571</td>\n",
+       "      <td>0.1429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>0.7714</td>\n",
+       "      <td>0.9429</td>\n",
+       "      <td>-0.1714</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>0.8571</td>\n",
+       "      <td>0.5429</td>\n",
+       "      <td>0.3143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>0.8857</td>\n",
+       "      <td>0.5714</td>\n",
+       "      <td>0.3143</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     task  label  ProtoSSL_mean  ProtoECGNet_mean  mean_difference\n",
+       "0  global    AMI         0.8571            0.6571           0.2000\n",
+       "1  global  CLBBB         0.9143            0.9429          -0.0286\n",
+       "2  global  CRBBB         0.9143            0.5429           0.3714\n",
+       "3  global    PVC         0.9714            0.5143           0.4571\n",
+       "4  paired    AMI         0.8000            0.6571           0.1429\n",
+       "5  paired  CLBBB         0.7714            0.9429          -0.1714\n",
+       "6  paired  CRBBB         0.8571            0.5429           0.3143\n",
+       "7  paired    PVC         0.8857            0.5714           0.3143"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "participant_by_label = (\n",
+    "    yesno.groupby([\"participant\", \"task\", \"label\", \"model\"])[\"good\"]\n",
+    "    .mean()\n",
+    "    .unstack(\"model\")\n",
+    "    .reset_index()\n",
+    ")\n",
+    "participant_by_label[\"difference\"] = (\n",
+    "    participant_by_label[\"ProtoSSL\"] - participant_by_label[\"ProtoECGNet\"]\n",
+    ")\n",
+    "\n",
+    "per_label_summary = (\n",
+    "    participant_by_label.groupby([\"task\", \"label\"])\n",
+    "    .agg(\n",
+    "        ProtoSSL_mean=(\"ProtoSSL\", \"mean\"),\n",
+    "        ProtoECGNet_mean=(\"ProtoECGNet\", \"mean\"),\n",
+    "        mean_difference=(\"difference\", \"mean\"),\n",
+    "    )\n",
+    "    .reset_index()\n",
+    ")\n",
+    "per_label_summary\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1dd8d16",
+   "metadata": {},
+   "source": [
+    "### Descriptive summaries for the comparative A/B/Both/Neither question"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "06bb6c2f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>preference</th>\n",
+       "      <th>task</th>\n",
+       "      <th>Both</th>\n",
+       "      <th>Neither</th>\n",
+       "      <th>ProtoECGNet</th>\n",
+       "      <th>ProtoSSL</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>51</td>\n",
+       "      <td>0</td>\n",
+       "      <td>25</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>34</td>\n",
+       "      <td>6</td>\n",
+       "      <td>36</td>\n",
+       "      <td>64</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "preference    task  Both  Neither  ProtoECGNet  ProtoSSL\n",
+       "0           global    51        0           25        64\n",
+       "1           paired    34        6           36        64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "preference_overall = (\n",
+    "    prefs.groupby([\"task\", \"preference\"])\n",
+    "    .size()\n",
+    "    .unstack(fill_value=0)\n",
+    "    .reset_index()\n",
+    ")\n",
+    "preference_overall\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "a324d62e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>preference</th>\n",
+       "      <th>task</th>\n",
+       "      <th>label</th>\n",
+       "      <th>Both</th>\n",
+       "      <th>Neither</th>\n",
+       "      <th>ProtoECGNet</th>\n",
+       "      <th>ProtoSSL</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>16</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>17</td>\n",
+       "      <td>0</td>\n",
+       "      <td>10</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>global</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>8</td>\n",
+       "      <td>3</td>\n",
+       "      <td>9</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2</td>\n",
+       "      <td>17</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "preference    task  label  Both  Neither  ProtoECGNet  ProtoSSL\n",
+       "0           global    AMI    16        0            5        14\n",
+       "1           global  CLBBB    17        0           10         8\n",
+       "2           global  CRBBB     8        0            5        22\n",
+       "3           global    PVC    10        0            5        20\n",
+       "4           paired    AMI     8        3            9        15\n",
+       "5           paired  CLBBB    12        2           17         4\n",
+       "6           paired  CRBBB     8        0            5        22\n",
+       "7           paired    PVC     6        1            5        23"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "preference_by_label = (\n",
+    "    prefs.groupby([\"task\", \"label\", \"preference\"])\n",
+    "    .size()\n",
+    "    .unstack(fill_value=0)\n",
+    "    .reset_index()\n",
+    ")\n",
+    "preference_by_label\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0aba0956",
+   "metadata": {},
+   "source": [
+    "### Fleiss' kappa for the binary yes/no ratings\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "0d869989",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overall Fleiss' kappa: 0.2877\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/9j/f0qlzhxj2klgf3bxm77sqz300000gn/T/ipykernel_25474/3937073973.py:14: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
+      "  .apply(fleiss_from_yesno)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>label</th>\n",
+       "      <th>fleiss_kappa</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AMI</td>\n",
+       "      <td>0.2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>0.0542</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>0.3000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>PVC</td>\n",
+       "      <td>0.4000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   label  fleiss_kappa\n",
+       "0    AMI        0.2023\n",
+       "1  CLBBB        0.0542\n",
+       "2  CRBBB        0.3000\n",
+       "3    PVC        0.4000"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def fleiss_from_yesno(df):\n",
+    "    table = []\n",
+    "    for _, g in df.groupby([\"case_id\", \"task\", \"model\"]):\n",
+    "        counts = g[\"good\"].value_counts().reindex([0, 1], fill_value=0)\n",
+    "        table.append(counts.values)\n",
+    "\n",
+    "    table = np.asarray(table)\n",
+    "    return float(fleiss_kappa(table))\n",
+    "\n",
+    "overall_kappa = fleiss_from_yesno(yesno)\n",
+    "\n",
+    "kappa_by_label = (\n",
+    "    yesno.groupby(\"label\", group_keys=False)\n",
+    "    .apply(fleiss_from_yesno)\n",
+    "    .rename(\"fleiss_kappa\")\n",
+    "    .reset_index()\n",
+    ")\n",
+    "\n",
+    "print(\"Overall Fleiss' kappa:\", round(overall_kappa, 4))\n",
+    "kappa_by_label\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "80bfa9be",
+   "metadata": {},
+   "source": [
+    "### Compact summary tables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "64519600",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>task</th>\n",
+       "      <th>n_participants</th>\n",
+       "      <th>ProtoSSL_mean</th>\n",
+       "      <th>ProtoECGNet_mean</th>\n",
+       "      <th>mean_difference</th>\n",
+       "      <th>ci95_low</th>\n",
+       "      <th>ci95_high</th>\n",
+       "      <th>wilcoxon_W</th>\n",
+       "      <th>wilcoxon_p</th>\n",
+       "      <th>paired_t_p</th>\n",
+       "      <th>sign_test_p</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.9143</td>\n",
+       "      <td>0.6643</td>\n",
+       "      <td>0.2500</td>\n",
+       "      <td>0.1432</td>\n",
+       "      <td>0.3568</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0156</td>\n",
+       "      <td>0.0012</td>\n",
+       "      <td>0.0156</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0.8286</td>\n",
+       "      <td>0.6786</td>\n",
+       "      <td>0.1500</td>\n",
+       "      <td>0.0501</td>\n",
+       "      <td>0.2499</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>0.0156</td>\n",
+       "      <td>0.0104</td>\n",
+       "      <td>0.0156</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     task  n_participants  ProtoSSL_mean  ProtoECGNet_mean  mean_difference  ci95_low  ci95_high  wilcoxon_W  wilcoxon_p  paired_t_p  sign_test_p\n",
+       "0  global               7         0.9143            0.6643           0.2500    0.1432     0.3568      0.0000      0.0156      0.0012       0.0156\n",
+       "1  paired               7         0.8286            0.6786           0.1500    0.0501     0.2499      0.0000      0.0156      0.0104       0.0156"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "primary_results_rounded = primary_results.copy()\n",
+    "for col in [\"ProtoSSL_mean\", \"ProtoECGNet_mean\", \"mean_difference\", \"ci95_low\", \"ci95_high\", \"wilcoxon_p\", \"paired_t_p\", \"sign_test_p\"]:\n",
+    "    primary_results_rounded[col] = primary_results_rounded[col].round(4)\n",
+    "primary_results_rounded\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "67c346fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>model</th>\n",
+       "      <th>task</th>\n",
+       "      <th>label</th>\n",
+       "      <th>ProtoECGNet</th>\n",
+       "      <th>ProtoSSL</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>global</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>23/35 (65.7%)</td>\n",
+       "      <td>30/35 (85.7%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>33/35 (94.3%)</td>\n",
+       "      <td>32/35 (91.4%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>global</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>19/35 (54.3%)</td>\n",
+       "      <td>32/35 (91.4%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>global</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>18/35 (51.4%)</td>\n",
+       "      <td>34/35 (97.1%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>AMI</td>\n",
+       "      <td>23/35 (65.7%)</td>\n",
+       "      <td>28/35 (80.0%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CLBBB</td>\n",
+       "      <td>33/35 (94.3%)</td>\n",
+       "      <td>27/35 (77.1%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>CRBBB</td>\n",
+       "      <td>19/35 (54.3%)</td>\n",
+       "      <td>30/35 (85.7%)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>paired</td>\n",
+       "      <td>PVC</td>\n",
+       "      <td>20/35 (57.1%)</td>\n",
+       "      <td>31/35 (88.6%)</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "model    task  label    ProtoECGNet       ProtoSSL\n",
+       "0      global    AMI  23/35 (65.7%)  30/35 (85.7%)\n",
+       "1      global  CLBBB  33/35 (94.3%)  32/35 (91.4%)\n",
+       "2      global  CRBBB  19/35 (54.3%)  32/35 (91.4%)\n",
+       "3      global    PVC  18/35 (51.4%)  34/35 (97.1%)\n",
+       "4      paired    AMI  23/35 (65.7%)  28/35 (80.0%)\n",
+       "5      paired  CLBBB  33/35 (94.3%)  27/35 (77.1%)\n",
+       "6      paired  CRBBB  19/35 (54.3%)  30/35 (85.7%)\n",
+       "7      paired    PVC  20/35 (57.1%)  31/35 (88.6%)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "label_yesno_pivot = label_yesno.copy()\n",
+    "label_yesno_pivot[\"summary\"] = (\n",
+    "    label_yesno_pivot[\"n_yes\"].astype(str)\n",
+    "    + \"/\"\n",
+    "    + label_yesno_pivot[\"n_total\"].astype(str)\n",
+    "    + \" (\"\n",
+    "    + (100 * label_yesno_pivot[\"proportion\"]).round(1).astype(str)\n",
+    "    + \"%)\"\n",
+    ")\n",
+    "label_yesno_pivot = (\n",
+    "    label_yesno_pivot[[\"task\", \"label\", \"model\", \"summary\"]]\n",
+    "    .pivot(index=[\"task\", \"label\"], columns=\"model\", values=\"summary\")\n",
+    "    .reset_index()\n",
+    ")\n",
+    "label_yesno_pivot\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51807e2d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "900aa255",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ecg_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	record_id	redcap_survey_identifier	user_study_form_timestamp	consent	prototypes_quality_choices	prototypea_quality	prototypeb_quality	prototypes_quality_choices_2	explanation_a	explanation_b	case1_prototypes_quality_choices	case1_prototypea_quality	case1_prototypeb_quality	case1_prototypes_quality_choices_2	case1_explanation_a	case1_explanation_b	case2_prototypes_quality_choices	case2_prototypea_quality	case2_prototypeb_quality	case2_prototypes_quality_choices_2	case2_explanation_a	case2_explanation_b	case3_prototypes_quality_choices	case3_prototypea_quality	case3_prototypeb_quality	case3_prototypes_quality_choices_2	case3_explanation_a	case3_explanation_b	case4_prototypes_quality_choices	case4_prototypea_quality	case4_prototypeb_quality	case4_prototypes_quality_choices_2	case4_explanation_a	case4_explanation_b	case5_prototypes_quality_choices	case5_prototypea_quality	case5_prototypeb_quality	case5_prototypes_quality_choices_2	case5_explanation_a	case5_explanation_b	case6_prototypes_quality_choices	case6_prototypea_quality	case6_prototypeb_quality	case6_prototypes_quality_choices_2	case6_explanation_a	case6_explanation_b	case7_prototypes_quality_choices	case7_prototypea_quality	case7_prototypeb_quality	case7_prototypes_quality_choices_2	case7_explanation_a	case7_explanation_b	case8_prototypes_quality_choices	case8_prototypea_quality	case8_prototypeb_quality	case8_prototypes_quality_choices_2	case8_explanation_a	case8_explanation_b	case9_prototypes_quality_choices	case9_prototypea_quality	case9_prototypeb_quality	case9_prototypes_quality_choices_2	case9_explanation_a	case9_explanation_b	case10_prototypes_quality_choices	case10_prototypea_quality	case10_prototypeb_quality	case10_prototypes_quality_choices_2	case10_explanation_a	case10_explanation_b	case11_prototypes_quality_choices	case11_prototypea_quality	case11_prototypeb_quality	case11_prototypes_quality_choices_2	case11_explanation_a	case11_explanation_b	case12_prototypes_quality_choices	case12_prototypea_quality	case12_prototypeb_quality	case12_prototypes_quality_choices_2	case12_explanation_a	case12_explanation_b	case13_prototypes_quality_choices	case13_prototypea_quality	case13_prototypeb_quality	case13_prototypes_quality_choices_2	case13_explanation_a	case13_explanation_b	case14_prototypes_quality_choices	case14_prototypea_quality	case14_prototypeb_quality	case14_prototypes_quality_choices_2	case14_explanation_a	case14_explanation_b	case15_prototypes_quality_choices	case15_prototypea_quality	case15_prototypeb_quality	case15_prototypes_quality_choices_2	case15_explanation_a	case15_explanation_b	case16_prototypes_quality_choices	case16_prototypea_quality	case16_prototypeb_quality	case16_prototypes_quality_choices_2	case16_explanation_a	case16_explanation_b	case17_prototypes_quality_choices	case17_prototypea_quality	case17_prototypeb_quality	case17_prototypes_quality_choices_2	case17_explanation_a	case17_explanation_b	case18_prototypes_quality_choices	case18_prototypea_quality	case18_prototypeb_quality	case18_prototypes_quality_choices_2	case18_explanation_a	case18_explanation_b	case19_prototypes_quality_choices	case19_prototypea_quality	case19_prototypeb_quality	case19_prototypes_quality_choices_2	case19_explanation_a	case19_explanation_b	case20_prototypes_quality_choices	case20_prototypea_quality	case20_prototypeb_quality	case20_prototypes_quality_choices_2	case20_explanation_a	case20_explanation_b	user_study_form_complete
0	5	NaN	2026-04-06 18:18:56	1	2	0	1	2	0	1	3	1	1	3	1	1	1	1	1	1	1	1	2	1	1	2	1	1	3	1	1	1	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	1	1	1	2	1	1	2	1	1	2	1	1	3	1	1	2	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	3	1	1	2	0	1	2	0	1	3	1	1	1	1	1	2	0	1	2	0	1	3	1	1	3	1	1	3	1	1	2	1	1	2
1	7	NaN	2026-04-09 11:26:53	1	2	0	1	2	0	1	3	1	1	1	1	1	3	1	1	3	1	1	3	0	0	2	0	1	1	1	0	1	1	0	3	1	1	3	1	1	1	1	0	1	1	0	1	1	1	1	1	0	3	1	1	3	1	1	3	1	1	1	1	0	3	1	1	2	1	0	1	1	0	1	1	0	2	1	1	3	1	1	2	0	1	3	1	1	3	1	1	1	1	1	3	1	1	2	1	1	2	0	1	2	0	1	1	1	0	1	1	0	2	0	1	2	0	1	1	1	1	1	1	0	2	0	1	2	0	1	2
	participant	case_id	label	task	model	good
0	5	1	AMI	global	ProtoSSL	1
1	5	1	AMI	global	ProtoECGNet	1
2	5	1	AMI	paired	ProtoSSL	1
3	5	1	AMI	paired	ProtoECGNet	1
4	5	2	AMI	global	ProtoSSL	1
	task	model	n_yes	n_total	proportion
0	global	ProtoECGNet	93	140	0.6643
1	global	ProtoSSL	128	140	0.9143
2	paired	ProtoECGNet	95	140	0.6786
3	paired	ProtoSSL	116	140	0.8286
	task	label	model	n_yes	n_total	proportion
0	global	AMI	ProtoECGNet	23	35	0.6571
1	global	AMI	ProtoSSL	30	35	0.8571
2	global	CLBBB	ProtoECGNet	33	35	0.9429
3	global	CLBBB	ProtoSSL	32	35	0.9143
4	global	CRBBB	ProtoECGNet	19	35	0.5429
5	global	CRBBB	ProtoSSL	32	35	0.9143
6	global	PVC	ProtoECGNet	18	35	0.5143
7	global	PVC	ProtoSSL	34	35	0.9714
8	paired	AMI	ProtoECGNet	23	35	0.6571
9	paired	AMI	ProtoSSL	28	35	0.8000
10	paired	CLBBB	ProtoECGNet	33	35	0.9429
11	paired	CLBBB	ProtoSSL	27	35	0.7714
12	paired	CRBBB	ProtoECGNet	19	35	0.5429
13	paired	CRBBB	ProtoSSL	30	35	0.8571
14	paired	PVC	ProtoECGNet	20	35	0.5714
15	paired	PVC	ProtoSSL	31	35	0.8857
model	participant	task	ProtoECGNet	ProtoSSL	difference
0	5	global	0.9000	1.0000	0.1000
1	5	paired	0.9000	1.0000	0.1000
2	7	global	0.7000	0.8000	0.1000
3	7	paired	0.6000	0.8000	0.2000
4	8	global	0.4500	0.8000	0.3500
5	8	paired	0.5000	0.5500	0.0500
6	9	global	0.6000	0.9500	0.3500
7	9	paired	0.5500	0.9000	0.3500
8	10	global	0.6000	0.9500	0.3500
9	10	paired	0.7500	0.9500	0.2000
10	11	global	0.6500	0.9500	0.3000
11	11	paired	0.8000	0.9000	0.1000
12	12	global	0.7500	0.9500	0.2000
13	12	paired	0.6500	0.7000	0.0500
model	task	label	ProtoECGNet	ProtoSSL
0	global	AMI	23/35 (65.7%)	30/35 (85.7%)
1	global	CLBBB	33/35 (94.3%)	32/35 (91.4%)
2	global	CRBBB	19/35 (54.3%)	32/35 (91.4%)
3	global	PVC	18/35 (51.4%)	34/35 (97.1%)
4	paired	AMI	23/35 (65.7%)	28/35 (80.0%)
5	paired	CLBBB	33/35 (94.3%)	27/35 (77.1%)
6	paired	CRBBB	19/35 (54.3%)	30/35 (85.7%)
7	paired	PVC	20/35 (57.1%)	31/35 (88.6%)