diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 864d864..c2141c0 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -108,3 +108,4 @@ user-study/images @StevenSong user-study/metadata.csv @StevenSong user-study/prepare_samples.ipynb @StevenSong user-study/results.csv @StevenSong +user-study/user_study_analysis.ipynb @sahilsethi0105 diff --git a/user-study/user_study_analysis.ipynb b/user-study/user_study_analysis.ipynb new file mode 100644 index 0000000..21d249f --- /dev/null +++ b/user-study/user_study_analysis.ipynb @@ -0,0 +1,2176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a8401e54", + "metadata": {}, + "source": [ + "# ProtoSSL User study analysis\n", + "\n", + "This notebook conducts:\n", + "\n", + "- **Primary analysis:** participant-level paired comparison of the proportion of responses rated as good for ProtoSSL vs ProtoECGNet, done separately for the two tasks.\n", + "- **Primary test:** two-sided **Wilcoxon signed-rank test** across participants.\n", + "- **Comparative A/B/Both/Neither question:** descriptive summaries\n", + "- **Inter-rater agreement:** **Fleiss' kappa** for the binary yes/no ratings, reported overall and by label.\n", + "\n", + "The **participants** are the primary unit of inference.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "73ba5be4", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import numpy as np\n", + "import pandas as pd\n", + "from scipy.stats import wilcoxon, ttest_rel, binomtest, t as tdist\n", + "from statsmodels.stats.inter_rater import fleiss_kappa\n", + "\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.width\", 200)\n", + "pd.set_option(\"display.float_format\", lambda x: f\"{x:.4f}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d094a664", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "results shape: (7, 131)\n", + "metadata shape: (20, 15)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
record_idredcap_survey_identifieruser_study_form_timestampconsentprototypes_quality_choicesprototypea_qualityprototypeb_qualityprototypes_quality_choices_2explanation_aexplanation_bcase1_prototypes_quality_choicescase1_prototypea_qualitycase1_prototypeb_qualitycase1_prototypes_quality_choices_2case1_explanation_acase1_explanation_bcase2_prototypes_quality_choicescase2_prototypea_qualitycase2_prototypeb_qualitycase2_prototypes_quality_choices_2case2_explanation_acase2_explanation_bcase3_prototypes_quality_choicescase3_prototypea_qualitycase3_prototypeb_qualitycase3_prototypes_quality_choices_2case3_explanation_acase3_explanation_bcase4_prototypes_quality_choicescase4_prototypea_qualitycase4_prototypeb_qualitycase4_prototypes_quality_choices_2case4_explanation_acase4_explanation_bcase5_prototypes_quality_choicescase5_prototypea_qualitycase5_prototypeb_qualitycase5_prototypes_quality_choices_2case5_explanation_acase5_explanation_bcase6_prototypes_quality_choicescase6_prototypea_qualitycase6_prototypeb_qualitycase6_prototypes_quality_choices_2case6_explanation_acase6_explanation_bcase7_prototypes_quality_choicescase7_prototypea_qualitycase7_prototypeb_qualitycase7_prototypes_quality_choices_2case7_explanation_acase7_explanation_bcase8_prototypes_quality_choicescase8_prototypea_qualitycase8_prototypeb_qualitycase8_prototypes_quality_choices_2case8_explanation_acase8_explanation_bcase9_prototypes_quality_choicescase9_prototypea_qualitycase9_prototypeb_qualitycase9_prototypes_quality_choices_2case9_explanation_acase9_explanation_bcase10_prototypes_quality_choicescase10_prototypea_qualitycase10_prototypeb_qualitycase10_prototypes_quality_choices_2case10_explanation_acase10_explanation_bcase11_prototypes_quality_choicescase11_prototypea_qualitycase11_prototypeb_qualitycase11_prototypes_quality_choices_2case11_explanation_acase11_explanation_bcase12_prototypes_quality_choicescase12_prototypea_qualitycase12_prototypeb_qualitycase12_prototypes_quality_choices_2case12_explanation_acase12_explanation_bcase13_prototypes_quality_choicescase13_prototypea_qualitycase13_prototypeb_qualitycase13_prototypes_quality_choices_2case13_explanation_acase13_explanation_bcase14_prototypes_quality_choicescase14_prototypea_qualitycase14_prototypeb_qualitycase14_prototypes_quality_choices_2case14_explanation_acase14_explanation_bcase15_prototypes_quality_choicescase15_prototypea_qualitycase15_prototypeb_qualitycase15_prototypes_quality_choices_2case15_explanation_acase15_explanation_bcase16_prototypes_quality_choicescase16_prototypea_qualitycase16_prototypeb_qualitycase16_prototypes_quality_choices_2case16_explanation_acase16_explanation_bcase17_prototypes_quality_choicescase17_prototypea_qualitycase17_prototypeb_qualitycase17_prototypes_quality_choices_2case17_explanation_acase17_explanation_bcase18_prototypes_quality_choicescase18_prototypea_qualitycase18_prototypeb_qualitycase18_prototypes_quality_choices_2case18_explanation_acase18_explanation_bcase19_prototypes_quality_choicescase19_prototypea_qualitycase19_prototypeb_qualitycase19_prototypes_quality_choices_2case19_explanation_acase19_explanation_bcase20_prototypes_quality_choicescase20_prototypea_qualitycase20_prototypeb_qualitycase20_prototypes_quality_choices_2case20_explanation_acase20_explanation_buser_study_form_complete
05NaN2026-04-06 18:18:5612012013113111111112112113111113113113113113113113113113111112112112113112113113113113113113113112012013111112012013113113112112
17NaN2026-04-09 11:26:5312012013111113113113002011101103113111101101111103113113111103112101101102113112013113111113112112012011101102012011111102012012
\n", + "
" + ], + "text/plain": [ + " record_id redcap_survey_identifier user_study_form_timestamp consent prototypes_quality_choices prototypea_quality prototypeb_quality prototypes_quality_choices_2 explanation_a \\\n", + "0 5 NaN 2026-04-06 18:18:56 1 2 0 1 2 0 \n", + "1 7 NaN 2026-04-09 11:26:53 1 2 0 1 2 0 \n", + "\n", + " explanation_b case1_prototypes_quality_choices case1_prototypea_quality case1_prototypeb_quality case1_prototypes_quality_choices_2 case1_explanation_a case1_explanation_b \\\n", + "0 1 3 1 1 3 1 1 \n", + "1 1 3 1 1 1 1 1 \n", + "\n", + " case2_prototypes_quality_choices case2_prototypea_quality case2_prototypeb_quality case2_prototypes_quality_choices_2 case2_explanation_a case2_explanation_b \\\n", + "0 1 1 1 1 1 1 \n", + "1 3 1 1 3 1 1 \n", + "\n", + " case3_prototypes_quality_choices case3_prototypea_quality case3_prototypeb_quality case3_prototypes_quality_choices_2 case3_explanation_a case3_explanation_b \\\n", + "0 2 1 1 2 1 1 \n", + "1 3 0 0 2 0 1 \n", + "\n", + " case4_prototypes_quality_choices case4_prototypea_quality case4_prototypeb_quality case4_prototypes_quality_choices_2 case4_explanation_a case4_explanation_b \\\n", + "0 3 1 1 1 1 1 \n", + "1 1 1 0 1 1 0 \n", + "\n", + " case5_prototypes_quality_choices case5_prototypea_quality case5_prototypeb_quality case5_prototypes_quality_choices_2 case5_explanation_a case5_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 3 1 1 3 1 1 \n", + "\n", + " case6_prototypes_quality_choices case6_prototypea_quality case6_prototypeb_quality case6_prototypes_quality_choices_2 case6_explanation_a case6_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 1 1 0 1 1 0 \n", + "\n", + " case7_prototypes_quality_choices case7_prototypea_quality case7_prototypeb_quality case7_prototypes_quality_choices_2 case7_explanation_a case7_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 1 1 1 1 1 0 \n", + "\n", + " case8_prototypes_quality_choices case8_prototypea_quality case8_prototypeb_quality case8_prototypes_quality_choices_2 case8_explanation_a case8_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 3 1 1 3 1 1 \n", + "\n", + " case9_prototypes_quality_choices case9_prototypea_quality case9_prototypeb_quality case9_prototypes_quality_choices_2 case9_explanation_a case9_explanation_b \\\n", + "0 3 1 1 1 1 1 \n", + "1 3 1 1 1 1 0 \n", + "\n", + " case10_prototypes_quality_choices case10_prototypea_quality case10_prototypeb_quality case10_prototypes_quality_choices_2 case10_explanation_a case10_explanation_b \\\n", + "0 2 1 1 2 1 1 \n", + "1 3 1 1 2 1 0 \n", + "\n", + " case11_prototypes_quality_choices case11_prototypea_quality case11_prototypeb_quality case11_prototypes_quality_choices_2 case11_explanation_a case11_explanation_b \\\n", + "0 2 1 1 3 1 1 \n", + "1 1 1 0 1 1 0 \n", + "\n", + " case12_prototypes_quality_choices case12_prototypea_quality case12_prototypeb_quality case12_prototypes_quality_choices_2 case12_explanation_a case12_explanation_b \\\n", + "0 2 1 1 3 1 1 \n", + "1 2 1 1 3 1 1 \n", + "\n", + " case13_prototypes_quality_choices case13_prototypea_quality case13_prototypeb_quality case13_prototypes_quality_choices_2 case13_explanation_a case13_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 2 0 1 3 1 1 \n", + "\n", + " case14_prototypes_quality_choices case14_prototypea_quality case14_prototypeb_quality case14_prototypes_quality_choices_2 case14_explanation_a case14_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 3 1 1 1 1 1 \n", + "\n", + " case15_prototypes_quality_choices case15_prototypea_quality case15_prototypeb_quality case15_prototypes_quality_choices_2 case15_explanation_a case15_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 3 1 1 2 1 1 \n", + "\n", + " case16_prototypes_quality_choices case16_prototypea_quality case16_prototypeb_quality case16_prototypes_quality_choices_2 case16_explanation_a case16_explanation_b \\\n", + "0 2 0 1 2 0 1 \n", + "1 2 0 1 2 0 1 \n", + "\n", + " case17_prototypes_quality_choices case17_prototypea_quality case17_prototypeb_quality case17_prototypes_quality_choices_2 case17_explanation_a case17_explanation_b \\\n", + "0 3 1 1 1 1 1 \n", + "1 1 1 0 1 1 0 \n", + "\n", + " case18_prototypes_quality_choices case18_prototypea_quality case18_prototypeb_quality case18_prototypes_quality_choices_2 case18_explanation_a case18_explanation_b \\\n", + "0 2 0 1 2 0 1 \n", + "1 2 0 1 2 0 1 \n", + "\n", + " case19_prototypes_quality_choices case19_prototypea_quality case19_prototypeb_quality case19_prototypes_quality_choices_2 case19_explanation_a case19_explanation_b \\\n", + "0 3 1 1 3 1 1 \n", + "1 1 1 1 1 1 0 \n", + "\n", + " case20_prototypes_quality_choices case20_prototypea_quality case20_prototypeb_quality case20_prototypes_quality_choices_2 case20_explanation_a case20_explanation_b user_study_form_complete \n", + "0 3 1 1 2 1 1 2 \n", + "1 2 0 1 2 0 1 2 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = pd.read_csv(\"results.csv\")\n", + "metadata = pd.read_csv(\"metadata.csv\")\n", + "\n", + "print(\"results shape:\", results.shape)\n", + "print(\"metadata shape:\", metadata.shape)\n", + "results.head(2)\n" + ] + }, + { + "cell_type": "markdown", + "id": "7f870f0f", + "metadata": {}, + "source": [ + "### Decode REDCap responses into analysis tables\n", + "\n", + "`yesno` contains one row per participant × case × task × model for the binary yes/no questions.\n", + "\n", + "`prefs` contains one row per participant × case × task for the A/B/Both/Neither comparative question, decoded back to the actual model identities using the metadata file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "16acb6b2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "yesno shape: (560, 6)\n", + "prefs shape: (280, 5)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
participantcase_idlabeltaskmodelgood
051AMIglobalProtoSSL1
151AMIglobalProtoECGNet1
251AMIpairedProtoSSL1
351AMIpairedProtoECGNet1
452AMIglobalProtoSSL1
\n", + "
" + ], + "text/plain": [ + " participant case_id label task model good\n", + "0 5 1 AMI global ProtoSSL 1\n", + "1 5 1 AMI global ProtoECGNet 1\n", + "2 5 1 AMI paired ProtoSSL 1\n", + "3 5 1 AMI paired ProtoECGNet 1\n", + "4 5 2 AMI global ProtoSSL 1" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "case_map = (\n", + " metadata.rename(\n", + " columns={\n", + " \"Study Index\": \"case_id\",\n", + " \"Label\": \"label\",\n", + " \"ProtoSSL Assignment\": \"ssl_assignment\",\n", + " \"ProtoECGNet Assignment\": \"ecg_assignment\",\n", + " }\n", + " )[[\"case_id\", \"label\", \"ssl_assignment\", \"ecg_assignment\"]]\n", + " .copy()\n", + ")\n", + "case_map[\"case_id\"] = case_map[\"case_id\"].astype(int)\n", + "\n", + "pref_code = {1: \"A\", 2: \"B\", 3: \"Both\", 4: \"Neither\"}\n", + "\n", + "yes_rows = []\n", + "pref_rows = []\n", + "\n", + "for _, row in results.iterrows():\n", + " participant = int(row[\"record_id\"])\n", + " for case_id in range(1, 21):\n", + " meta_row = case_map.loc[case_map[\"case_id\"] == case_id].iloc[0]\n", + "\n", + " task_specs = [\n", + " (\"global\",\n", + " f\"case{case_id}_prototypea_quality\",\n", + " f\"case{case_id}_prototypeb_quality\",\n", + " f\"case{case_id}_prototypes_quality_choices\"),\n", + " (\"paired\",\n", + " f\"case{case_id}_explanation_a\",\n", + " f\"case{case_id}_explanation_b\",\n", + " f\"case{case_id}_prototypes_quality_choices_2\"),\n", + " ]\n", + "\n", + " for task, a_col, b_col, pref_col in task_specs:\n", + " for shown_letter, col in [(\"A\", a_col), (\"B\", b_col)]:\n", + " actual_model = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == shown_letter else \"ProtoECGNet\"\n", + " yes_rows.append(\n", + " {\n", + " \"participant\": participant,\n", + " \"case_id\": case_id,\n", + " \"label\": meta_row[\"label\"],\n", + " \"task\": task,\n", + " \"model\": actual_model,\n", + " \"good\": int(row[col]),\n", + " }\n", + " )\n", + "\n", + " pref_value = pref_code[int(row[pref_col])]\n", + " if pref_value in [\"A\", \"B\"]:\n", + " actual_pref = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == pref_value else \"ProtoECGNet\"\n", + " else:\n", + " actual_pref = pref_value\n", + "\n", + " pref_rows.append(\n", + " {\n", + " \"participant\": participant,\n", + " \"case_id\": case_id,\n", + " \"label\": meta_row[\"label\"],\n", + " \"task\": task,\n", + " \"preference\": actual_pref,\n", + " }\n", + " )\n", + "\n", + "yesno = pd.DataFrame(yes_rows)\n", + "prefs = pd.DataFrame(pref_rows)\n", + "\n", + "print(\"yesno shape:\", yesno.shape)\n", + "print(\"prefs shape:\", prefs.shape)\n", + "yesno.head()\n" + ] + }, + { + "cell_type": "markdown", + "id": "0dce4296", + "metadata": {}, + "source": [ + "### Descriptive summaries for the binary yes/no questions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "961b8054", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
taskmodeln_yesn_totalproportion
0globalProtoECGNet931400.6643
1globalProtoSSL1281400.9143
2pairedProtoECGNet951400.6786
3pairedProtoSSL1161400.8286
\n", + "
" + ], + "text/plain": [ + " task model n_yes n_total proportion\n", + "0 global ProtoECGNet 93 140 0.6643\n", + "1 global ProtoSSL 128 140 0.9143\n", + "2 paired ProtoECGNet 95 140 0.6786\n", + "3 paired ProtoSSL 116 140 0.8286" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "overall_yesno = (\n", + " yesno.groupby([\"task\", \"model\"])[\"good\"]\n", + " .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n", + " .reset_index()\n", + ")\n", + "overall_yesno\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "da09502f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tasklabelmodeln_yesn_totalproportion
0globalAMIProtoECGNet23350.6571
1globalAMIProtoSSL30350.8571
2globalCLBBBProtoECGNet33350.9429
3globalCLBBBProtoSSL32350.9143
4globalCRBBBProtoECGNet19350.5429
5globalCRBBBProtoSSL32350.9143
6globalPVCProtoECGNet18350.5143
7globalPVCProtoSSL34350.9714
8pairedAMIProtoECGNet23350.6571
9pairedAMIProtoSSL28350.8000
10pairedCLBBBProtoECGNet33350.9429
11pairedCLBBBProtoSSL27350.7714
12pairedCRBBBProtoECGNet19350.5429
13pairedCRBBBProtoSSL30350.8571
14pairedPVCProtoECGNet20350.5714
15pairedPVCProtoSSL31350.8857
\n", + "
" + ], + "text/plain": [ + " task label model n_yes n_total proportion\n", + "0 global AMI ProtoECGNet 23 35 0.6571\n", + "1 global AMI ProtoSSL 30 35 0.8571\n", + "2 global CLBBB ProtoECGNet 33 35 0.9429\n", + "3 global CLBBB ProtoSSL 32 35 0.9143\n", + "4 global CRBBB ProtoECGNet 19 35 0.5429\n", + "5 global CRBBB ProtoSSL 32 35 0.9143\n", + "6 global PVC ProtoECGNet 18 35 0.5143\n", + "7 global PVC ProtoSSL 34 35 0.9714\n", + "8 paired AMI ProtoECGNet 23 35 0.6571\n", + "9 paired AMI ProtoSSL 28 35 0.8000\n", + "10 paired CLBBB ProtoECGNet 33 35 0.9429\n", + "11 paired CLBBB ProtoSSL 27 35 0.7714\n", + "12 paired CRBBB ProtoECGNet 19 35 0.5429\n", + "13 paired CRBBB ProtoSSL 30 35 0.8571\n", + "14 paired PVC ProtoECGNet 20 35 0.5714\n", + "15 paired PVC ProtoSSL 31 35 0.8857" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "label_yesno = (\n", + " yesno.groupby([\"task\", \"label\", \"model\"])[\"good\"]\n", + " .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n", + " .reset_index()\n", + ")\n", + "label_yesno\n" + ] + }, + { + "cell_type": "markdown", + "id": "03a50af7", + "metadata": {}, + "source": [ + "### Primary analysis\n", + "\n", + "For each participant and each task, compute the proportion of responses rated as good for each model across the 20 cases. Then compare ProtoSSL vs ProtoECGNet with a **paired Wilcoxon signed-rank test**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3194ed44", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
modelparticipanttaskProtoECGNetProtoSSLdifference
05global0.90001.00000.1000
15paired0.90001.00000.1000
27global0.70000.80000.1000
37paired0.60000.80000.2000
48global0.45000.80000.3500
58paired0.50000.55000.0500
69global0.60000.95000.3500
79paired0.55000.90000.3500
810global0.60000.95000.3500
910paired0.75000.95000.2000
1011global0.65000.95000.3000
1111paired0.80000.90000.1000
1212global0.75000.95000.2000
1312paired0.65000.70000.0500
\n", + "
" + ], + "text/plain": [ + "model participant task ProtoECGNet ProtoSSL difference\n", + "0 5 global 0.9000 1.0000 0.1000\n", + "1 5 paired 0.9000 1.0000 0.1000\n", + "2 7 global 0.7000 0.8000 0.1000\n", + "3 7 paired 0.6000 0.8000 0.2000\n", + "4 8 global 0.4500 0.8000 0.3500\n", + "5 8 paired 0.5000 0.5500 0.0500\n", + "6 9 global 0.6000 0.9500 0.3500\n", + "7 9 paired 0.5500 0.9000 0.3500\n", + "8 10 global 0.6000 0.9500 0.3500\n", + "9 10 paired 0.7500 0.9500 0.2000\n", + "10 11 global 0.6500 0.9500 0.3000\n", + "11 11 paired 0.8000 0.9000 0.1000\n", + "12 12 global 0.7500 0.9500 0.2000\n", + "13 12 paired 0.6500 0.7000 0.0500" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "participant_summary = (\n", + " yesno.groupby([\"participant\", \"task\", \"model\"])[\"good\"]\n", + " .mean()\n", + " .unstack(\"model\")\n", + " .reset_index()\n", + ")\n", + "\n", + "participant_summary[\"difference\"] = (\n", + " participant_summary[\"ProtoSSL\"] - participant_summary[\"ProtoECGNet\"]\n", + ")\n", + "\n", + "participant_summary\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b6ff4f59", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
taskn_participantsProtoSSL_meanProtoECGNet_meanmean_differenceci95_lowci95_highwilcoxon_Wwilcoxon_ppaired_t_psign_test_p
0global70.91430.66430.25000.14320.35680.00000.01560.00120.0156
1paired70.82860.67860.15000.05010.24990.00000.01560.01040.0156
\n", + "
" + ], + "text/plain": [ + " task n_participants ProtoSSL_mean ProtoECGNet_mean mean_difference ci95_low ci95_high wilcoxon_W wilcoxon_p paired_t_p sign_test_p\n", + "0 global 7 0.9143 0.6643 0.2500 0.1432 0.3568 0.0000 0.0156 0.0012 0.0156\n", + "1 paired 7 0.8286 0.6786 0.1500 0.0501 0.2499 0.0000 0.0156 0.0104 0.0156" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "def participant_level_analysis(df):\n", + " rows = []\n", + " raw_wilcoxon_p = []\n", + "\n", + " for task in [\"global\", \"paired\"]:\n", + " sub = df[df[\"task\"] == task].copy()\n", + " diffs = sub[\"difference\"].to_numpy()\n", + "\n", + " w = wilcoxon(diffs, alternative=\"two-sided\", zero_method=\"wilcox\", method=\"exact\")\n", + " ttest = ttest_rel(sub[\"ProtoSSL\"], sub[\"ProtoECGNet\"])\n", + " sign = binomtest(np.sum(diffs > 0), np.sum(diffs != 0), p=0.5, alternative=\"two-sided\")\n", + "\n", + " mean_diff = float(np.mean(diffs))\n", + " sd_diff = float(np.std(diffs, ddof=1))\n", + " se_diff = sd_diff / math.sqrt(len(diffs))\n", + " tcrit = tdist.ppf(0.975, df=len(diffs) - 1)\n", + " ci_low = mean_diff - tcrit * se_diff\n", + " ci_high = mean_diff + tcrit * se_diff\n", + "\n", + " rows.append(\n", + " {\n", + " \"task\": task,\n", + " \"n_participants\": len(sub),\n", + " \"ProtoSSL_mean\": sub[\"ProtoSSL\"].mean(),\n", + " \"ProtoECGNet_mean\": sub[\"ProtoECGNet\"].mean(),\n", + " \"mean_difference\": mean_diff,\n", + " \"ci95_low\": ci_low,\n", + " \"ci95_high\": ci_high,\n", + " \"wilcoxon_W\": float(w.statistic),\n", + " \"wilcoxon_p\": float(w.pvalue),\n", + " \"paired_t_p\": float(ttest.pvalue),\n", + " \"sign_test_p\": float(sign.pvalue),\n", + " }\n", + " )\n", + " raw_wilcoxon_p.append(float(w.pvalue))\n", + "\n", + " out = pd.DataFrame(rows)\n", + " return out\n", + "\n", + "primary_results = participant_level_analysis(participant_summary)\n", + "primary_results\n" + ] + }, + { + "cell_type": "markdown", + "id": "2c182260", + "metadata": {}, + "source": [ + "## Per-label participant-level summaries\n", + "\n", + "These are useful to show **where** the overall pattern comes from, but I recommend keeping them **descriptive only** in the paper because each label has only 5 cases.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "53b57b45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tasklabelProtoSSL_meanProtoECGNet_meanmean_difference
0globalAMI0.85710.65710.2000
1globalCLBBB0.91430.9429-0.0286
2globalCRBBB0.91430.54290.3714
3globalPVC0.97140.51430.4571
4pairedAMI0.80000.65710.1429
5pairedCLBBB0.77140.9429-0.1714
6pairedCRBBB0.85710.54290.3143
7pairedPVC0.88570.57140.3143
\n", + "
" + ], + "text/plain": [ + " task label ProtoSSL_mean ProtoECGNet_mean mean_difference\n", + "0 global AMI 0.8571 0.6571 0.2000\n", + "1 global CLBBB 0.9143 0.9429 -0.0286\n", + "2 global CRBBB 0.9143 0.5429 0.3714\n", + "3 global PVC 0.9714 0.5143 0.4571\n", + "4 paired AMI 0.8000 0.6571 0.1429\n", + "5 paired CLBBB 0.7714 0.9429 -0.1714\n", + "6 paired CRBBB 0.8571 0.5429 0.3143\n", + "7 paired PVC 0.8857 0.5714 0.3143" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "participant_by_label = (\n", + " yesno.groupby([\"participant\", \"task\", \"label\", \"model\"])[\"good\"]\n", + " .mean()\n", + " .unstack(\"model\")\n", + " .reset_index()\n", + ")\n", + "participant_by_label[\"difference\"] = (\n", + " participant_by_label[\"ProtoSSL\"] - participant_by_label[\"ProtoECGNet\"]\n", + ")\n", + "\n", + "per_label_summary = (\n", + " participant_by_label.groupby([\"task\", \"label\"])\n", + " .agg(\n", + " ProtoSSL_mean=(\"ProtoSSL\", \"mean\"),\n", + " ProtoECGNet_mean=(\"ProtoECGNet\", \"mean\"),\n", + " mean_difference=(\"difference\", \"mean\"),\n", + " )\n", + " .reset_index()\n", + ")\n", + "per_label_summary\n" + ] + }, + { + "cell_type": "markdown", + "id": "e1dd8d16", + "metadata": {}, + "source": [ + "### Descriptive summaries for the comparative A/B/Both/Neither question" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "06bb6c2f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
preferencetaskBothNeitherProtoECGNetProtoSSL
0global5102564
1paired3463664
\n", + "
" + ], + "text/plain": [ + "preference task Both Neither ProtoECGNet ProtoSSL\n", + "0 global 51 0 25 64\n", + "1 paired 34 6 36 64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preference_overall = (\n", + " prefs.groupby([\"task\", \"preference\"])\n", + " .size()\n", + " .unstack(fill_value=0)\n", + " .reset_index()\n", + ")\n", + "preference_overall\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a324d62e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
preferencetasklabelBothNeitherProtoECGNetProtoSSL
0globalAMI160514
1globalCLBBB170108
2globalCRBBB80522
3globalPVC100520
4pairedAMI83915
5pairedCLBBB122174
6pairedCRBBB80522
7pairedPVC61523
\n", + "
" + ], + "text/plain": [ + "preference task label Both Neither ProtoECGNet ProtoSSL\n", + "0 global AMI 16 0 5 14\n", + "1 global CLBBB 17 0 10 8\n", + "2 global CRBBB 8 0 5 22\n", + "3 global PVC 10 0 5 20\n", + "4 paired AMI 8 3 9 15\n", + "5 paired CLBBB 12 2 17 4\n", + "6 paired CRBBB 8 0 5 22\n", + "7 paired PVC 6 1 5 23" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preference_by_label = (\n", + " prefs.groupby([\"task\", \"label\", \"preference\"])\n", + " .size()\n", + " .unstack(fill_value=0)\n", + " .reset_index()\n", + ")\n", + "preference_by_label\n" + ] + }, + { + "cell_type": "markdown", + "id": "0aba0956", + "metadata": {}, + "source": [ + "### Fleiss' kappa for the binary yes/no ratings\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0d869989", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overall Fleiss' kappa: 0.2877\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/9j/f0qlzhxj2klgf3bxm77sqz300000gn/T/ipykernel_25474/3937073973.py:14: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", + " .apply(fleiss_from_yesno)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
labelfleiss_kappa
0AMI0.2023
1CLBBB0.0542
2CRBBB0.3000
3PVC0.4000
\n", + "
" + ], + "text/plain": [ + " label fleiss_kappa\n", + "0 AMI 0.2023\n", + "1 CLBBB 0.0542\n", + "2 CRBBB 0.3000\n", + "3 PVC 0.4000" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def fleiss_from_yesno(df):\n", + " table = []\n", + " for _, g in df.groupby([\"case_id\", \"task\", \"model\"]):\n", + " counts = g[\"good\"].value_counts().reindex([0, 1], fill_value=0)\n", + " table.append(counts.values)\n", + "\n", + " table = np.asarray(table)\n", + " return float(fleiss_kappa(table))\n", + "\n", + "overall_kappa = fleiss_from_yesno(yesno)\n", + "\n", + "kappa_by_label = (\n", + " yesno.groupby(\"label\", group_keys=False)\n", + " .apply(fleiss_from_yesno)\n", + " .rename(\"fleiss_kappa\")\n", + " .reset_index()\n", + ")\n", + "\n", + "print(\"Overall Fleiss' kappa:\", round(overall_kappa, 4))\n", + "kappa_by_label\n" + ] + }, + { + "cell_type": "markdown", + "id": "80bfa9be", + "metadata": {}, + "source": [ + "### Compact summary tables" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "64519600", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
taskn_participantsProtoSSL_meanProtoECGNet_meanmean_differenceci95_lowci95_highwilcoxon_Wwilcoxon_ppaired_t_psign_test_p
0global70.91430.66430.25000.14320.35680.00000.01560.00120.0156
1paired70.82860.67860.15000.05010.24990.00000.01560.01040.0156
\n", + "
" + ], + "text/plain": [ + " task n_participants ProtoSSL_mean ProtoECGNet_mean mean_difference ci95_low ci95_high wilcoxon_W wilcoxon_p paired_t_p sign_test_p\n", + "0 global 7 0.9143 0.6643 0.2500 0.1432 0.3568 0.0000 0.0156 0.0012 0.0156\n", + "1 paired 7 0.8286 0.6786 0.1500 0.0501 0.2499 0.0000 0.0156 0.0104 0.0156" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "primary_results_rounded = primary_results.copy()\n", + "for col in [\"ProtoSSL_mean\", \"ProtoECGNet_mean\", \"mean_difference\", \"ci95_low\", \"ci95_high\", \"wilcoxon_p\", \"paired_t_p\", \"sign_test_p\"]:\n", + " primary_results_rounded[col] = primary_results_rounded[col].round(4)\n", + "primary_results_rounded\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "67c346fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
modeltasklabelProtoECGNetProtoSSL
0globalAMI23/35 (65.7%)30/35 (85.7%)
1globalCLBBB33/35 (94.3%)32/35 (91.4%)
2globalCRBBB19/35 (54.3%)32/35 (91.4%)
3globalPVC18/35 (51.4%)34/35 (97.1%)
4pairedAMI23/35 (65.7%)28/35 (80.0%)
5pairedCLBBB33/35 (94.3%)27/35 (77.1%)
6pairedCRBBB19/35 (54.3%)30/35 (85.7%)
7pairedPVC20/35 (57.1%)31/35 (88.6%)
\n", + "
" + ], + "text/plain": [ + "model task label ProtoECGNet ProtoSSL\n", + "0 global AMI 23/35 (65.7%) 30/35 (85.7%)\n", + "1 global CLBBB 33/35 (94.3%) 32/35 (91.4%)\n", + "2 global CRBBB 19/35 (54.3%) 32/35 (91.4%)\n", + "3 global PVC 18/35 (51.4%) 34/35 (97.1%)\n", + "4 paired AMI 23/35 (65.7%) 28/35 (80.0%)\n", + "5 paired CLBBB 33/35 (94.3%) 27/35 (77.1%)\n", + "6 paired CRBBB 19/35 (54.3%) 30/35 (85.7%)\n", + "7 paired PVC 20/35 (57.1%) 31/35 (88.6%)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "label_yesno_pivot = label_yesno.copy()\n", + "label_yesno_pivot[\"summary\"] = (\n", + " label_yesno_pivot[\"n_yes\"].astype(str)\n", + " + \"/\"\n", + " + label_yesno_pivot[\"n_total\"].astype(str)\n", + " + \" (\"\n", + " + (100 * label_yesno_pivot[\"proportion\"]).round(1).astype(str)\n", + " + \"%)\"\n", + ")\n", + "label_yesno_pivot = (\n", + " label_yesno_pivot[[\"task\", \"label\", \"model\", \"summary\"]]\n", + " .pivot(index=[\"task\", \"label\"], columns=\"model\", values=\"summary\")\n", + " .reset_index()\n", + ")\n", + "label_yesno_pivot\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51807e2d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "900aa255", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ecg_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}