Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions aeo_updates/temperature_gas_price_regression/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Python cache
__pycache__/
*.pyc

# Notebook checkpoints
.ipynb_checkpoints/

# Outputs and intermediate inputs (reproducible)
outputs/
inputs/gasreg_regression_data.csv
inputs/state_cdd*.txt
inputs/state_hdd*.txt
inputs/cendiv_cdd*.txt
inputs/cendiv_hdd*.txt
17 changes: 17 additions & 0 deletions aeo_updates/temperature_gas_price_regression/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Workflow
To reproduce the temperature - gas price regression inputs used in ReEDS, the following notebooks should be run in the order listed here:
- `write_gasreg_regression_data.ipynb` writes the daily heating/cooling degree day and spot price data used in the regression. Creates the intermediate input file `inputs/gasreg_regression_data.csv`.
- `calculate_regression_params.ipynb` performs the regression and exports the corresponding parameters (HDD/CDD coefficients, intercept, and monthly fixed effects). Creates the output (ReEDS input) file `outputs/gasreg_degree_day_price_mult_regression_params.csv`.
- `calculate_gasreg_degree_days.ipynb` calculates and exports annual (2010-2050) HDD/CDD projections for each gasreg (regions used in the regression). Creates the output (ReEDS input) file `outputs/gasreg_degree_days.csv`.

Optionally, the `inspect_hub_locations.ipynb` can also be used to identify the gasreg that each natural gas hub overlaps with geographically. This notebook is not required to reproduce the ReEDS inputs, but was used initially to associate gasregs with daily gas prices and is here for documentation purposes. Note that the file containing the hub locations used in this notebook is not accessible to people external to the lab.

# Outputs
The two outputs generated by these notebooks are `outputs/gasreg_degree_day_price_mult_regression_params.csv` and `outputs/gasreg_degree_days.csv`. To be used in ReEDS, both should be copied to the `inputs/fuelprices` folder in the ReEDS directory (with the file names unchanged).


# Sources
- `inputs/kdegday.txt`: [AEO 2026](https://github.com/EIAgov/NEMS/blob/main/input/bld/kdegday.txt)
- `inputs/NationalProjections_ProjectedTotalPopulation_2030-2050.csv`: [University of Virginia Weldon Cooper Center for Public Service](https://www.coopercenter.org/national-population-projections)
- `inputs/nst-est2020.xlsx`: [U.S. Census Bureau](https://www.census.gov/programs-surveys/popest/technical-documentation/research/evaluation-estimates/2020-evaluation-estimates/2010s-state-total.html)
- `inputs/NST-EST2025-POP.xlsx`: [U.S. Census Bureau](https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html)
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "43fc8779-04b0-47e9-b93a-39c8cee28a27",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.linear_model import LinearRegression\n",
"from pathlib import Path\n",
"import requests\n",
"import urllib3\n",
"from urllib3.exceptions import InsecureRequestWarning\n",
"import warnings\n",
"\n",
"urllib3.disable_warnings(InsecureRequestWarning)\n",
"warnings.simplefilter(action='ignore', category=pd.errors.ParserWarning)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "dec32a79-44fa-406e-91de-585cd375941a",
"metadata": {},
"outputs": [],
"source": [
"month_list = [\n",
" 'January',\n",
" 'February',\n",
" 'March',\n",
" 'April',\n",
" 'May',\n",
" 'June',\n",
" 'July',\n",
" 'August',\n",
" 'September',\n",
" 'October',\n",
" 'November',\n",
" 'December'\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b1e9fba8-72fe-4a40-ac97-2f8ff969eed5",
"metadata": {},
"outputs": [],
"source": [
"# Download state-level degree days\n",
"for dd_type in ['cdd', 'hdd']:\n",
" for year in range(1995, 2026):\n",
" for month in month_list:\n",
" if dd_type == 'cdd':\n",
" energy_type = 'cooling'\n",
" else:\n",
" energy_type = 'heating'\n",
"\n",
" url = f\"https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/legacy_files/{energy_type}/statesCONUS/{year}/{month}.txt\"\n",
" fpath = Path('inputs', f\"state_{dd_type}_{month}_{year}.txt\")\n",
" \n",
" response = requests.get(url, verify=False)\n",
" if response.status_code == 200:\n",
" with open(fpath, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(response.text)\n",
" else:\n",
" print(f\"Error: {response.status_code}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a672e38a-52ff-43da-8a56-4ccc0d39ce6e",
"metadata": {},
"outputs": [],
"source": [
"# Get annual HDD/CDDs for each state and each year from 1995 to 2025\n",
"cdd_dict = {}\n",
"for year in range(1995, 2026):\n",
" cdd_data_for_year = []\n",
" for month in month_list:\n",
" df = (\n",
" pd.read_csv(\n",
" Path('inputs', f'state_cdd_{month}_{year}.txt'),\n",
" skiprows=14,\n",
" sep='\\s{2,}| -',\n",
" header=None\n",
" )\n",
" .set_index(0)\n",
" [1]\n",
" )\n",
" cdd_data_for_year.append(df)\n",
" cdd_dict[year] = pd.concat(cdd_data_for_year, axis=1).sum(axis=1)\n",
"\n",
"hdd_dict = {}\n",
"for year in range(1995, 2026):\n",
" hdd_data_for_year = []\n",
" for month in month_list:\n",
" df = (\n",
" pd.read_csv(\n",
" Path('inputs', f'state_hdd_{month}_{year}.txt'),\n",
" skiprows=14,\n",
" sep='\\s{2,}| -',\n",
" header=None\n",
" )\n",
" .set_index(0)\n",
" [1]\n",
" )\n",
" hdd_data_for_year.append(df)\n",
" hdd_dict[year] = pd.concat(hdd_data_for_year, axis=1).sum(axis=1)\n",
"\n",
"state_cdd = pd.concat(cdd_dict, axis=1).rename_axis(index='state').iloc[:48].transpose()\n",
"state_hdd = pd.concat(hdd_dict, axis=1).rename_axis(index='state').iloc[:48].transpose()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b97c002e-6b96-40ab-ace1-e656a109b944",
"metadata": {},
"outputs": [],
"source": [
"# Use 30-year linear trend from 1995 to 2024 to estimate HDD/CDDs for 2026-2050\n",
"all_state_cdds = {}\n",
"X = np.array(state_cdd.loc[:2024].index).reshape(-1, 1)\n",
"for state in state_cdd.columns:\n",
" y = np.array(state_cdd.loc[:2024][state])\n",
" reg = LinearRegression().fit(X, y)\n",
" state_cdd_projected = pd.Series(\n",
" np.arange(2026, 2051) * reg.coef_ + reg.intercept_,\n",
" index = np.arange(2026, 2051)\n",
" )\n",
" all_state_cdds[state] = pd.concat([state_cdd[state], state_cdd_projected])\n",
"\n",
"all_state_hdds = {}\n",
"X = np.array(state_hdd.loc[:2024].index).reshape(-1, 1)\n",
"for state in state_hdd.columns:\n",
" y = np.array(state_hdd.loc[:2024][state])\n",
" reg = LinearRegression().fit(X, y)\n",
" state_hdd_projected = pd.Series(\n",
" np.arange(2026, 2051) * reg.coef_ + reg.intercept_,\n",
" index = np.arange(2026, 2051)\n",
" )\n",
" all_state_hdds[state] = pd.concat([state_hdd[state], state_hdd_projected])\n",
"\n",
"state_cdd = pd.concat(all_state_cdds, axis=1)\n",
"state_cdd.columns = state_cdd.columns.str.title()\n",
"\n",
"state_hdd = pd.concat(all_state_hdds, axis=1)\n",
"state_hdd.columns = state_hdd.columns.str.title()\n",
"\n",
"state_cdd = state_cdd.loc[2010:]\n",
"state_hdd = state_hdd.loc[2010:]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "bfcb343c-a3f4-4c04-a2ab-c6d4d68f348c",
"metadata": {},
"outputs": [],
"source": [
"# Get historical (pre-2025) populations and projections for 2030, 2040, and 2050\n",
"# Fill in-between years via linear interpolation\n",
"def read_historical_state_populations(fpath, year_range):\n",
" hist_population = pd.read_excel(fpath, skiprows=3)\n",
" hist_population = (\n",
" hist_population.rename(columns={'Unnamed: 0': 'state'})\n",
" .set_index('state')\n",
" [year_range]\n",
" .dropna(subset=year_range[0])\n",
" .iloc[5:]\n",
" )\n",
" hist_population.index = hist_population.index.str.replace('.', '')\n",
"\n",
" return hist_population\n",
"\n",
"population = pd.read_csv(\n",
" Path('inputs', 'NationalProjections_ProjectedTotalPopulation_2030-2050.csv')\n",
")\n",
"population = (\n",
" population.drop(columns='FIPS')\n",
" .rename(columns={'Geography Name': 'state'})\n",
" .set_index('state')\n",
" .drop('United States')\n",
" .replace(',', '', regex=True)\n",
" .astype(int)\n",
")\n",
"population.columns = population.columns.astype(int)\n",
"\n",
"hist_populations_2010s = read_historical_state_populations(\n",
" Path('inputs', 'nst-est2020.xlsx'),\n",
" range(2010, 2020)\n",
")\n",
"hist_populations_2020s = read_historical_state_populations(\n",
" Path('inputs', 'NST-EST2025-POP.xlsx'),\n",
" range(2020, 2026)\n",
")\n",
"\n",
"population = (\n",
" pd.concat(\n",
" [hist_populations_2010s, hist_populations_2020s, population.drop(columns=2020)],\n",
" axis=1\n",
" )\n",
" .dropna(subset=2050)\n",
" .reindex(columns=range(2010, 2051))\n",
" .interpolate(axis=1)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e7fa7475-bdc0-4637-bce2-acfefa7e8c6e",
"metadata": {},
"outputs": [],
"source": [
"# Calculate annual HDD/CDDs for 2010-2050 for non-cendiv gasregs\n",
"# by aggregating state-level HDD/CDDs via population-weighted average\n",
"gasreg_state_map = {\n",
" 'California': ['California'],\n",
" 'Northwest': ['Oregon', 'Washington'],\n",
" 'Southwest': ['Arizona', 'New Mexico'],\n",
" 'Mountain': ['Montana', 'Idaho', 'Wyoming', 'Nevada', 'Utah', 'Colorado']\n",
"}\n",
"gasreg_cdds = {}\n",
"gasreg_hdds = {}\n",
"\n",
"for gasreg, states in gasreg_state_map.items():\n",
" state_weighted_cdds = []\n",
" state_weighted_hdds = []\n",
"\n",
" gasreg_population = population.loc[states].sum()\n",
" for state in states:\n",
" weight = population.loc[state] / gasreg_population\n",
" weighted_cdd = state_cdd[state] * weight\n",
" weighted_hdd = state_hdd[state] * weight\n",
"\n",
" state_weighted_cdds.append(weighted_cdd)\n",
" state_weighted_hdds.append(weighted_hdd)\n",
"\n",
" gasreg_cdds[gasreg] = pd.concat(state_weighted_cdds, axis=1).sum(axis=1)\n",
" gasreg_hdds[gasreg] = pd.concat(state_weighted_hdds, axis=1).sum(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "ae1a27af-ba14-48fb-868a-20e0831bb444",
"metadata": {},
"outputs": [],
"source": [
"# Read cendiv-level degree days and combine with non-cendiv gasreg degree days\n",
"cendiv_number_name_map = {\n",
" '1': 'New England',\n",
" '2': 'Mid-Atlantic',\n",
" '3': 'East North Central',\n",
" '4': 'West North Central',\n",
" '5': 'South Atlantic',\n",
" '6': 'East South Central',\n",
" '7': 'West South Central',\n",
" '8': 'Mountain',\n",
" '9': 'Pacific'\n",
"}\n",
"annual_degree_days = pd.read_csv(\n",
" Path('inputs', 'kdegday.txt'),\n",
" sep='\\s+'\n",
")\n",
"\n",
"annual_degree_days['DD_type'] = 'CDD'\n",
"annual_degree_days.iloc[::2, annual_degree_days.columns.get_loc('DD_type')] = 'HDD'\n",
"\n",
"annual_hdd = (\n",
" annual_degree_days.loc[annual_degree_days.DD_type == 'HDD']\n",
" .reset_index(drop=True)\n",
" .drop(columns='DD_type')\n",
" .set_index('Year')\n",
" .loc[2010:]\n",
")\n",
"annual_hdd.columns = annual_hdd.columns.map(cendiv_number_name_map)\n",
"annual_hdd = annual_hdd.drop(columns=['Pacific', 'Mountain'])\n",
"for gasreg, hdd in gasreg_hdds.items():\n",
" annual_hdd[gasreg] = hdd.round().astype(int)\n",
"\n",
"annual_cdd = (\n",
" annual_degree_days.loc[annual_degree_days.DD_type == 'CDD']\n",
" .reset_index(drop=True)\n",
" .drop(columns='DD_type')\n",
" .set_index('Year')\n",
" .loc[2010:]\n",
")\n",
"annual_cdd.columns = annual_cdd.columns.map(cendiv_number_name_map)\n",
"annual_cdd = annual_cdd.drop(columns=['Pacific', 'Mountain'])\n",
"for gasreg, cdd in gasreg_cdds.items():\n",
" annual_cdd[gasreg] = cdd.round().astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84da335c-9e4b-40bc-8fb0-f9667402a50f",
"metadata": {},
"outputs": [],
"source": [
"# Reformat and export\n",
"df_hdd = annual_hdd.copy()\n",
"df_hdd.columns = [col.replace('-', '_').replace(' ', '_') for col in df_hdd.columns]\n",
"df_hdd = df_hdd.reset_index().rename(columns={'Year': 't'}).assign(ddtype='HDD')\n",
"\n",
"df_cdd = annual_cdd.copy()\n",
"df_cdd.columns = [col.replace('-', '_').replace(' ', '_') for col in df_cdd.columns]\n",
"df_cdd = df_cdd.reset_index().rename(columns={'Year': 't'}).assign(ddtype='CDD')\n",
"\n",
"df_out = (\n",
" pd.concat([df_cdd, df_hdd])\n",
" .set_index(['t', 'ddtype'])\n",
" .sort_index()\n",
" .sort_index(axis=1)\n",
")\n",
"\n",
"outpath = Path('outputs', 'gasreg_degree_days.csv')\n",
"outpath.parent.mkdir(parents=True, exist_ok=True)\n",
"df_out.to_csv(outpath)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading