diff --git a/aeo_updates/temperature_gas_price_regression/.gitignore b/aeo_updates/temperature_gas_price_regression/.gitignore new file mode 100644 index 0000000..72f049b --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/.gitignore @@ -0,0 +1,14 @@ +# Python cache +__pycache__/ +*.pyc + +# Notebook checkpoints +.ipynb_checkpoints/ + +# Outputs and intermediate inputs (reproducible) +outputs/ +inputs/gasreg_regression_data.csv +inputs/state_cdd*.txt +inputs/state_hdd*.txt +inputs/cendiv_cdd*.txt +inputs/cendiv_hdd*.txt \ No newline at end of file diff --git a/aeo_updates/temperature_gas_price_regression/README.md b/aeo_updates/temperature_gas_price_regression/README.md new file mode 100644 index 0000000..072283a --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/README.md @@ -0,0 +1,17 @@ +# Workflow +To reproduce the temperature - gas price regression inputs used in ReEDS, the following notebooks should be run in the order listed here: +- `write_gasreg_regression_data.ipynb` writes the daily heating/cooling degree day and spot price data used in the regression. Creates the intermediate input file `inputs/gasreg_regression_data.csv`. +- `calculate_regression_params.ipynb` performs the regression and exports the corresponding parameters (HDD/CDD coefficients, intercept, and monthly fixed effects). Creates the output (ReEDS input) file `outputs/gasreg_degree_day_price_mult_regression_params.csv`. +- `calculate_gasreg_degree_days.ipynb` calculates and exports annual (2010-2050) HDD/CDD projections for each gasreg (regions used in the regression). Creates the output (ReEDS input) file `outputs/gasreg_degree_days.csv`. + +Optionally, the `inspect_hub_locations.ipynb` can also be used to identify the gasreg that each natural gas hub overlaps with geographically. This notebook is not required to reproduce the ReEDS inputs, but was used initially to associate gasregs with daily gas prices and is here for documentation purposes. Note that the file containing the hub locations used in this notebook is not accessible to people external to the lab. + +# Outputs +The two outputs generated by these notebooks are `outputs/gasreg_degree_day_price_mult_regression_params.csv` and `outputs/gasreg_degree_days.csv`. To be used in ReEDS, both should be copied to the `inputs/fuelprices` folder in the ReEDS directory (with the file names unchanged). + + +# Sources +- `inputs/kdegday.txt`: [AEO 2026](https://github.com/EIAgov/NEMS/blob/main/input/bld/kdegday.txt) +- `inputs/NationalProjections_ProjectedTotalPopulation_2030-2050.csv`: [University of Virginia Weldon Cooper Center for Public Service](https://www.coopercenter.org/national-population-projections) +- `inputs/nst-est2020.xlsx`: [U.S. Census Bureau](https://www.census.gov/programs-surveys/popest/technical-documentation/research/evaluation-estimates/2020-evaluation-estimates/2010s-state-total.html) +- `inputs/NST-EST2025-POP.xlsx`: [U.S. Census Bureau](https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html) diff --git a/aeo_updates/temperature_gas_price_regression/calculate_gasreg_degree_days.ipynb b/aeo_updates/temperature_gas_price_regression/calculate_gasreg_degree_days.ipynb new file mode 100644 index 0000000..e519680 --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/calculate_gasreg_degree_days.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "43fc8779-04b0-47e9-b93a-39c8cee28a27", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "from pathlib import Path\n", + "import requests\n", + "import urllib3\n", + "from urllib3.exceptions import InsecureRequestWarning\n", + "import warnings\n", + "\n", + "urllib3.disable_warnings(InsecureRequestWarning)\n", + "warnings.simplefilter(action='ignore', category=pd.errors.ParserWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dec32a79-44fa-406e-91de-585cd375941a", + "metadata": {}, + "outputs": [], + "source": [ + "month_list = [\n", + " 'January',\n", + " 'February',\n", + " 'March',\n", + " 'April',\n", + " 'May',\n", + " 'June',\n", + " 'July',\n", + " 'August',\n", + " 'September',\n", + " 'October',\n", + " 'November',\n", + " 'December'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b1e9fba8-72fe-4a40-ac97-2f8ff969eed5", + "metadata": {}, + "outputs": [], + "source": [ + "# Download state-level degree days\n", + "for dd_type in ['cdd', 'hdd']:\n", + " for year in range(1995, 2026):\n", + " for month in month_list:\n", + " if dd_type == 'cdd':\n", + " energy_type = 'cooling'\n", + " else:\n", + " energy_type = 'heating'\n", + "\n", + " url = f\"https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/legacy_files/{energy_type}/statesCONUS/{year}/{month}.txt\"\n", + " fpath = Path('inputs', f\"state_{dd_type}_{month}_{year}.txt\")\n", + " \n", + " response = requests.get(url, verify=False)\n", + " if response.status_code == 200:\n", + " with open(fpath, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(response.text)\n", + " else:\n", + " print(f\"Error: {response.status_code}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a672e38a-52ff-43da-8a56-4ccc0d39ce6e", + "metadata": {}, + "outputs": [], + "source": [ + "# Get annual HDD/CDDs for each state and each year from 1995 to 2025\n", + "cdd_dict = {}\n", + "for year in range(1995, 2026):\n", + " cdd_data_for_year = []\n", + " for month in month_list:\n", + " df = (\n", + " pd.read_csv(\n", + " Path('inputs', f'state_cdd_{month}_{year}.txt'),\n", + " skiprows=14,\n", + " sep='\\s{2,}| -',\n", + " header=None\n", + " )\n", + " .set_index(0)\n", + " [1]\n", + " )\n", + " cdd_data_for_year.append(df)\n", + " cdd_dict[year] = pd.concat(cdd_data_for_year, axis=1).sum(axis=1)\n", + "\n", + "hdd_dict = {}\n", + "for year in range(1995, 2026):\n", + " hdd_data_for_year = []\n", + " for month in month_list:\n", + " df = (\n", + " pd.read_csv(\n", + " Path('inputs', f'state_hdd_{month}_{year}.txt'),\n", + " skiprows=14,\n", + " sep='\\s{2,}| -',\n", + " header=None\n", + " )\n", + " .set_index(0)\n", + " [1]\n", + " )\n", + " hdd_data_for_year.append(df)\n", + " hdd_dict[year] = pd.concat(hdd_data_for_year, axis=1).sum(axis=1)\n", + "\n", + "state_cdd = pd.concat(cdd_dict, axis=1).rename_axis(index='state').iloc[:48].transpose()\n", + "state_hdd = pd.concat(hdd_dict, axis=1).rename_axis(index='state').iloc[:48].transpose()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b97c002e-6b96-40ab-ace1-e656a109b944", + "metadata": {}, + "outputs": [], + "source": [ + "# Use 30-year linear trend from 1995 to 2024 to estimate HDD/CDDs for 2026-2050\n", + "all_state_cdds = {}\n", + "X = np.array(state_cdd.loc[:2024].index).reshape(-1, 1)\n", + "for state in state_cdd.columns:\n", + " y = np.array(state_cdd.loc[:2024][state])\n", + " reg = LinearRegression().fit(X, y)\n", + " state_cdd_projected = pd.Series(\n", + " np.arange(2026, 2051) * reg.coef_ + reg.intercept_,\n", + " index = np.arange(2026, 2051)\n", + " )\n", + " all_state_cdds[state] = pd.concat([state_cdd[state], state_cdd_projected])\n", + "\n", + "all_state_hdds = {}\n", + "X = np.array(state_hdd.loc[:2024].index).reshape(-1, 1)\n", + "for state in state_hdd.columns:\n", + " y = np.array(state_hdd.loc[:2024][state])\n", + " reg = LinearRegression().fit(X, y)\n", + " state_hdd_projected = pd.Series(\n", + " np.arange(2026, 2051) * reg.coef_ + reg.intercept_,\n", + " index = np.arange(2026, 2051)\n", + " )\n", + " all_state_hdds[state] = pd.concat([state_hdd[state], state_hdd_projected])\n", + "\n", + "state_cdd = pd.concat(all_state_cdds, axis=1)\n", + "state_cdd.columns = state_cdd.columns.str.title()\n", + "\n", + "state_hdd = pd.concat(all_state_hdds, axis=1)\n", + "state_hdd.columns = state_hdd.columns.str.title()\n", + "\n", + "state_cdd = state_cdd.loc[2010:]\n", + "state_hdd = state_hdd.loc[2010:]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "bfcb343c-a3f4-4c04-a2ab-c6d4d68f348c", + "metadata": {}, + "outputs": [], + "source": [ + "# Get historical (pre-2025) populations and projections for 2030, 2040, and 2050\n", + "# Fill in-between years via linear interpolation\n", + "def read_historical_state_populations(fpath, year_range):\n", + " hist_population = pd.read_excel(fpath, skiprows=3)\n", + " hist_population = (\n", + " hist_population.rename(columns={'Unnamed: 0': 'state'})\n", + " .set_index('state')\n", + " [year_range]\n", + " .dropna(subset=year_range[0])\n", + " .iloc[5:]\n", + " )\n", + " hist_population.index = hist_population.index.str.replace('.', '')\n", + "\n", + " return hist_population\n", + "\n", + "population = pd.read_csv(\n", + " Path('inputs', 'NationalProjections_ProjectedTotalPopulation_2030-2050.csv')\n", + ")\n", + "population = (\n", + " population.drop(columns='FIPS')\n", + " .rename(columns={'Geography Name': 'state'})\n", + " .set_index('state')\n", + " .drop('United States')\n", + " .replace(',', '', regex=True)\n", + " .astype(int)\n", + ")\n", + "population.columns = population.columns.astype(int)\n", + "\n", + "hist_populations_2010s = read_historical_state_populations(\n", + " Path('inputs', 'nst-est2020.xlsx'),\n", + " range(2010, 2020)\n", + ")\n", + "hist_populations_2020s = read_historical_state_populations(\n", + " Path('inputs', 'NST-EST2025-POP.xlsx'),\n", + " range(2020, 2026)\n", + ")\n", + "\n", + "population = (\n", + " pd.concat(\n", + " [hist_populations_2010s, hist_populations_2020s, population.drop(columns=2020)],\n", + " axis=1\n", + " )\n", + " .dropna(subset=2050)\n", + " .reindex(columns=range(2010, 2051))\n", + " .interpolate(axis=1)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e7fa7475-bdc0-4637-bce2-acfefa7e8c6e", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate annual HDD/CDDs for 2010-2050 for non-cendiv gasregs\n", + "# by aggregating state-level HDD/CDDs via population-weighted average\n", + "gasreg_state_map = {\n", + " 'California': ['California'],\n", + " 'Northwest': ['Oregon', 'Washington'],\n", + " 'Southwest': ['Arizona', 'New Mexico'],\n", + " 'Mountain': ['Montana', 'Idaho', 'Wyoming', 'Nevada', 'Utah', 'Colorado']\n", + "}\n", + "gasreg_cdds = {}\n", + "gasreg_hdds = {}\n", + "\n", + "for gasreg, states in gasreg_state_map.items():\n", + " state_weighted_cdds = []\n", + " state_weighted_hdds = []\n", + "\n", + " gasreg_population = population.loc[states].sum()\n", + " for state in states:\n", + " weight = population.loc[state] / gasreg_population\n", + " weighted_cdd = state_cdd[state] * weight\n", + " weighted_hdd = state_hdd[state] * weight\n", + "\n", + " state_weighted_cdds.append(weighted_cdd)\n", + " state_weighted_hdds.append(weighted_hdd)\n", + "\n", + " gasreg_cdds[gasreg] = pd.concat(state_weighted_cdds, axis=1).sum(axis=1)\n", + " gasreg_hdds[gasreg] = pd.concat(state_weighted_hdds, axis=1).sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ae1a27af-ba14-48fb-868a-20e0831bb444", + "metadata": {}, + "outputs": [], + "source": [ + "# Read cendiv-level degree days and combine with non-cendiv gasreg degree days\n", + "cendiv_number_name_map = {\n", + " '1': 'New England',\n", + " '2': 'Mid-Atlantic',\n", + " '3': 'East North Central',\n", + " '4': 'West North Central',\n", + " '5': 'South Atlantic',\n", + " '6': 'East South Central',\n", + " '7': 'West South Central',\n", + " '8': 'Mountain',\n", + " '9': 'Pacific'\n", + "}\n", + "annual_degree_days = pd.read_csv(\n", + " Path('inputs', 'kdegday.txt'),\n", + " sep='\\s+'\n", + ")\n", + "\n", + "annual_degree_days['DD_type'] = 'CDD'\n", + "annual_degree_days.iloc[::2, annual_degree_days.columns.get_loc('DD_type')] = 'HDD'\n", + "\n", + "annual_hdd = (\n", + " annual_degree_days.loc[annual_degree_days.DD_type == 'HDD']\n", + " .reset_index(drop=True)\n", + " .drop(columns='DD_type')\n", + " .set_index('Year')\n", + " .loc[2010:]\n", + ")\n", + "annual_hdd.columns = annual_hdd.columns.map(cendiv_number_name_map)\n", + "annual_hdd = annual_hdd.drop(columns=['Pacific', 'Mountain'])\n", + "for gasreg, hdd in gasreg_hdds.items():\n", + " annual_hdd[gasreg] = hdd.round().astype(int)\n", + "\n", + "annual_cdd = (\n", + " annual_degree_days.loc[annual_degree_days.DD_type == 'CDD']\n", + " .reset_index(drop=True)\n", + " .drop(columns='DD_type')\n", + " .set_index('Year')\n", + " .loc[2010:]\n", + ")\n", + "annual_cdd.columns = annual_cdd.columns.map(cendiv_number_name_map)\n", + "annual_cdd = annual_cdd.drop(columns=['Pacific', 'Mountain'])\n", + "for gasreg, cdd in gasreg_cdds.items():\n", + " annual_cdd[gasreg] = cdd.round().astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84da335c-9e4b-40bc-8fb0-f9667402a50f", + "metadata": {}, + "outputs": [], + "source": [ + "# Reformat and export\n", + "df_hdd = annual_hdd.copy()\n", + "df_hdd.columns = [col.replace('-', '_').replace(' ', '_') for col in df_hdd.columns]\n", + "df_hdd = df_hdd.reset_index().rename(columns={'Year': 't'}).assign(ddtype='HDD')\n", + "\n", + "df_cdd = annual_cdd.copy()\n", + "df_cdd.columns = [col.replace('-', '_').replace(' ', '_') for col in df_cdd.columns]\n", + "df_cdd = df_cdd.reset_index().rename(columns={'Year': 't'}).assign(ddtype='CDD')\n", + "\n", + "df_out = (\n", + " pd.concat([df_cdd, df_hdd])\n", + " .set_index(['t', 'ddtype'])\n", + " .sort_index()\n", + " .sort_index(axis=1)\n", + ")\n", + "\n", + "outpath = Path('outputs', 'gasreg_degree_days.csv')\n", + "outpath.parent.mkdir(parents=True, exist_ok=True)\n", + "df_out.to_csv(outpath)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/aeo_updates/temperature_gas_price_regression/calculate_regression_params.ipynb b/aeo_updates/temperature_gas_price_regression/calculate_regression_params.ipynb new file mode 100644 index 0000000..2e6e7f6 --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/calculate_regression_params.ipynb @@ -0,0 +1,232 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "d70f4c36-7194-4615-bf6a-672108e3cba6", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import statsmodels.api as sm\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a8f5f3b-63c4-4d27-901d-3837caa4e83a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_gasreg_data(gasreg):\n", + " data = pd.concat([\n", + " gasreg_log_mult_diffs[[gasreg]].add_suffix('_price'),\n", + " gasreg_degree_days.filter(like=gasreg)\n", + " ], axis=1)\n", + " \n", + " return data\n", + "\n", + "def run_ols_regression(gasreg, data):\n", + " data['month'] = data.index.get_level_values('month')\n", + " month_dummies = (\n", + " pd.get_dummies(data['month'], prefix='month')\n", + " .drop(columns='month_5')\n", + " .astype(float)\n", + " )\n", + " \n", + " degree_day_columns = [f'{gasreg}_{dd_var}' for dd_var in ['cdd', 'hdd']]\n", + " temperature_variable = data[degree_day_columns].astype(float)\n", + " \n", + " X = pd.concat([temperature_variable, month_dummies], axis=1)\n", + " X = sm.add_constant(X)\n", + " y = data[f\"{gasreg}_price\"].values.astype(float)\n", + " \n", + " # The lines below represent an ordinary least-squares regression using a\n", + " # heteroskedasticity- and autocorrelation-consistent (HAC) estimator,\n", + " # which ensures that the standard errors calculated in the regression\n", + " # are robust to heteroskedastic and autocorrelated residuals (both\n", + " # common when working with time series data).\n", + " # The maxlags value represents the maximum number of timesteps\n", + " # (in this case days) across which the estimator adjusts for auto-\n", + " # correlation. The value is calculated using the Stock and Watson rule-of-thumb:\n", + " # number of lags = 0.75 * (number of observations)**(1/3)\n", + " # https://stock.scholars.harvard.edu/sites/g/files/omnuum5911/files/stock/files/aea_2015_lecture4_har_rev.pdf\n", + " maxlags = int(round(0.75 * len(data)**(1/3)))\n", + " model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': maxlags})\n", + "\n", + " return model\n", + "\n", + "def apply_regression_model(gasreg, model, data):\n", + " data['month'] = data.index.get_level_values('month')\n", + " month_dummies = (\n", + " pd.get_dummies(data['month'], prefix='month')\n", + " .drop(columns='month_5')\n", + " .astype(float)\n", + " )\n", + " \n", + " degree_day_columns = [f'{gasreg}_{dd_var}' for dd_var in ['cdd', 'hdd']]\n", + " temperature_variable = data[degree_day_columns].astype(float)\n", + "\n", + " X_test = pd.concat([temperature_variable, month_dummies], axis=1)\n", + " X_test = sm.add_constant(X_test)\n", + " y_pred = model.predict(X_test)\n", + "\n", + " return y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "541c9d4f-8d35-4ed5-bfa7-16783de4d23d", + "metadata": {}, + "outputs": [], + "source": [ + "# Get daily HDD/CDDs and prices for each gasreg\n", + "gasreg_data = pd.read_csv(\n", + " Path('inputs', 'gasreg_regression_data.csv'),\n", + " index_col=['year', 'month', 'day']\n", + ")\n", + "gasreg_degree_days = (\n", + " gasreg_data[[col for col in gasreg_data.columns if 'dd' in col]]\n", + " .copy()\n", + ")\n", + "gasreg_prices = (\n", + " gasreg_data[[col for col in gasreg_data.columns if 'price' in col]]\n", + " .copy()\n", + ")\n", + "gasreg_prices.columns = [col.replace('_price', '') for col in gasreg_prices.columns]" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a56f84f6-6fbb-44be-a2c7-ea6f890bea8a", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate average annual prices for each gasreg\n", + "# and the daily deviations (log of the multiplicative difference)\n", + "# from the annual average price\n", + "gasreg_annual_average_prices = (\n", + " gasreg_prices.groupby(gasreg_prices.index.get_level_values('year'))\n", + " .transform('mean')\n", + ")\n", + "gasreg_log_mult_diffs = np.log(gasreg_prices) - np.log(gasreg_annual_average_prices)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "206b51f8-04ce-447b-909f-3545e9abcc6a", + "metadata": {}, + "outputs": [], + "source": [ + "# Fit the regression model using data from 2014-2023 (2024 was used for testing)\n", + "train_years = [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]\n", + "test_years = [2024]\n", + "\n", + "gasreg_model_params = {}\n", + "for gasreg in gasreg_prices.columns.tolist():\n", + " data = get_gasreg_data(gasreg)\n", + " train_data = data.loc[data.index.get_level_values('year').isin(train_years)].copy()\n", + " model = run_ols_regression(gasreg, train_data)\n", + "\n", + " gasreg_model_params[gasreg] = (\n", + " model.params\n", + " .rename({\n", + " f\"{gasreg}_cdd\": 'cdd',\n", + " f\"{gasreg}_hdd\": 'hdd'\n", + " })\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d61d11ac-641d-4bae-b642-ff176bc97238", + "metadata": {}, + "outputs": [], + "source": [ + "# Reformat for ReEDS\n", + "regression_params = (\n", + " pd.concat(gasreg_model_params, axis=1)\n", + " .rename({\n", + " 'const': 'alpha',\n", + " 'cdd': 'beta_CDD',\n", + " 'hdd': 'beta_HDD',\n", + " 'month_1': 'alpha_JAN',\n", + " 'month_2': 'alpha_FEB',\n", + " 'month_3': 'alpha_MAR',\n", + " 'month_4': 'alpha_APR',\n", + " 'month_6': 'alpha_JUN',\n", + " 'month_7': 'alpha_JUL',\n", + " 'month_8': 'alpha_AUG',\n", + " 'month_9': 'alpha_SEP',\n", + " 'month_10': 'alpha_OCT',\n", + " 'month_11': 'alpha_NOV',\n", + " 'month_12': 'alpha_DEC'\n", + " })\n", + ")\n", + "regression_params.loc['alpha_MAY'] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0290ae67-a2ac-4126-b576-026e23c55039", + "metadata": {}, + "outputs": [], + "source": [ + "# Export\n", + "outpath = Path('outputs', 'gasreg_degree_day_price_mult_regression_params.csv')\n", + "outpath.parent.mkdir(parents=True, exist_ok=True)\n", + "(\n", + " regression_params.loc[[\n", + " 'beta_CDD',\n", + " 'beta_HDD',\n", + " 'alpha',\n", + " 'alpha_JAN',\n", + " 'alpha_FEB',\n", + " 'alpha_MAR',\n", + " 'alpha_APR',\n", + " 'alpha_MAY',\n", + " 'alpha_JUN',\n", + " 'alpha_JUL',\n", + " 'alpha_AUG',\n", + " 'alpha_SEP',\n", + " 'alpha_OCT',\n", + " 'alpha_NOV',\n", + " 'alpha_DEC'\n", + " ]]\n", + " .rename_axis('param')\n", + " .round(3)\n", + " .sort_index(axis=1)\n", + " .to_csv(outpath)\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/aeo_updates/temperature_gas_price_regression/inputs/NST-EST2025-POP.xlsx b/aeo_updates/temperature_gas_price_regression/inputs/NST-EST2025-POP.xlsx new file mode 100644 index 0000000..4088a5e Binary files /dev/null and b/aeo_updates/temperature_gas_price_regression/inputs/NST-EST2025-POP.xlsx differ diff --git a/aeo_updates/temperature_gas_price_regression/inputs/NationalProjections_ProjectedTotalPopulation_2030-2050.csv b/aeo_updates/temperature_gas_price_regression/inputs/NationalProjections_ProjectedTotalPopulation_2030-2050.csv new file mode 100644 index 0000000..5b457b3 --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/inputs/NationalProjections_ProjectedTotalPopulation_2030-2050.csv @@ -0,0 +1,53 @@ +FIPS,Geography Name,2020,2030,2040,2050 +0,United States," 331,449,281 ","349,695,865","362,293,471","371,766,982" +1,Alabama," 5,024,279 ","5,213,942","5,269,412","5,263,069" +2,Alaska," 733,391 ","751,139","745,731","731,687" +4,Arizona," 7,151,502 ","7,766,236","8,354,030","8,881,012" +5,Arkansas," 3,011,524 ","3,084,796","3,063,117","3,005,949" +6,California," 39,538,223 ","41,321,454","42,164,453","42,520,492" +8,Colorado," 5,773,714 ","6,387,211","7,050,081","7,690,559" +9,Connecticut," 3,605,944 ","3,629,971","3,521,129","3,375,527" +10,Delaware," 989,948 ","1,063,673","1,127,463","1,181,074" +11,District of Columbia," 689,545 ","761,820","839,353","913,940" +12,Florida," 21,538,187 ","23,790,046","26,202,483","28,521,380" +13,Georgia," 10,711,908 ","11,534,245","12,262,064","12,883,056" +15,Hawaii," 1,455,271 ","1,529,814","1,573,525","1,599,519" +16,Idaho," 1,839,106 ","2,066,265","2,330,712","2,598,197" +17,Illinois," 12,812,508 ","12,798,928","12,288,130","11,659,474" +18,Indiana," 6,785,528 ","7,018,843","7,062,290","7,022,739" +19,Iowa," 3,190,369 ","3,301,784","3,324,567","3,308,283" +20,Kansas," 2,937,880 ","3,002,711","2,972,719","2,908,542" +21,Kentucky," 4,505,836 ","4,633,881","4,626,153","4,564,320" +22,Louisiana," 4,657,757 ","4,752,752","4,694,905","4,583,416" +23,Maine," 1,362,359 ","1,388,293","1,368,934","1,334,028" +24,Maryland," 6,177,224 ","6,494,090","6,680,280","6,791,286" +25,Massachusetts," 7,029,917 ","7,409,395","7,648,478","7,802,759" +26,Michigan," 10,077,331 ","10,224,497","10,022,976","9,710,293" +27,Minnesota," 5,706,494 ","6,023,698","6,231,051","6,370,014" +28,Mississippi," 2,961,279 ","2,956,772","2,837,019","2,690,219" +29,Missouri," 6,154,913 ","6,281,703","6,206,924","6,061,168" +30,Montana," 1,084,225 ","1,159,875","1,222,020","1,272,408" +31,Nebraska," 1,961,504 ","2,067,878","2,135,297","2,179,078" +32,Nevada," 3,104,614 ","3,437,890","3,799,910","4,150,836" +33,New Hampshire," 1,377,529 ","1,424,739","1,433,347","1,425,109" +34,New Jersey," 9,288,994 ","9,675,872","9,828,845","9,867,242" +35,New Mexico," 2,117,522 ","2,162,106","2,137,651","2,088,708" +36,New York," 20,201,249 ","20,836,092","20,883,793","20,686,329" +37,North Carolina," 10,439,388 ","11,160,159","11,747,051","12,219,917" +38,North Dakota," 779,094 ","867,403","966,020","1,063,242" +39,Ohio," 11,799,448 ","11,999,653","11,800,007","11,467,712" +40,Oklahoma," 3,959,353 ","4,121,121","4,181,942","4,193,934" +41,Oregon," 4,237,256 ","4,563,425","4,852,675","5,099,791" +42,Pennsylvania," 13,002,700 ","13,231,491","13,022,171","12,665,983" +44,Rhode Island," 1,097,379 ","1,131,942","1,134,638","1,124,012" +45,South Carolina," 5,118,425 ","5,514,501","5,867,090","6,169,078" +46,South Dakota," 886,667 ","944,259","988,684","1,023,068" +47,Tennessee," 6,910,840 ","7,359,519","7,705,470","7,973,147" +48,Texas," 29,145,505 ","32,463,602","36,177,044","39,842,850" +49,Utah," 3,271,616 ","3,699,050","4,209,773","4,734,870" +50,Vermont," 643,077 ","656,319","648,499","633,264" +51,Virginia," 8,631,393 ","9,129,002","9,468,578","9,705,706" +53,Washington," 7,705,281 ","8,512,355","9,377,817","10,210,211" +54,West Virginia," 1,793,716 ","1,750,206","1,628,902","1,498,241" +55,Wisconsin," 5,893,718 ","6,052,525","6,030,730","5,938,601" +56,Wyoming," 576,851 ","586,925","577,539","561,644" diff --git a/aeo_updates/temperature_gas_price_regression/inputs/kdegday.txt b/aeo_updates/temperature_gas_price_regression/inputs/kdegday.txt new file mode 100644 index 0000000..2b1156e --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/inputs/kdegday.txt @@ -0,0 +1,123 @@ +Year 1 2 3 4 5 6 7 8 9 +1990 5988 5240 5780 6141 2281 2947 1966 5369 3541 +1990 428 566 602 912 2049 1560 2527 1223 775 +1991 6087 5410 6011 6433 2549 3226 2194 5424 3487 +1991 465 742 908 1055 2107 1697 2454 1122 671 +1992 6977 6232 6407 6386 2901 3518 2158 5218 3142 +1992 245 385 409 587 1701 1241 2162 1163 843 +1993 6847 6202 6756 7285 3014 3791 2501 5656 3464 +1993 454 663 688 766 2005 1569 2323 1089 638 +1994 6792 6195 6478 6622 2741 3423 2113 5125 3575 +1994 505 632 620 829 1925 1390 2380 1398 770 +1995 6686 6079 6741 6916 2961 3653 2148 5078 3210 +1995 472 705 878 928 2020 1611 2398 1225 725 +1996 6774 6251 6999 7447 3082 3796 2280 5111 3283 +1996 368 528 596 773 1822 1428 2483 1377 823 +1997 6768 6074 6674 6852 2822 3668 2419 5284 3232 +1997 355 492 547 824 1842 1352 2331 1328 864 +1998 5781 5042 5317 5833 2407 3021 1996 5189 3812 +1998 456 673 850 1083 2257 1878 3008 1240 747 +1999 6063 5578 5993 5992 2614 3151 1801 4871 3632 +1999 575 779 817 920 2015 1677 2623 1244 665 +2000 6624 5986 6317 6504 2902 3555 2152 4952 3464 +2000 279 460 630 983 1925 1672 2773 1493 771 +2001 6203 5531 5845 6224 2599 3331 2161 4983 3550 +2001 463 627 723 993 1898 1475 2541 1523 858 +2002 6235 5537 6128 6489 2661 3447 2291 5176 3516 +2002 508 773 899 1045 2185 1755 2513 1480 784 +2003 6976 6245 6536 6598 2881 3564 2205 4795 3359 +2003 476 618 619 906 1979 1449 2495 1564 977 +2004 6710 5878 6179 6333 2712 3293 2041 4988 3352 +2004 367 595 585 720 2038 1516 2481 1304 827 +2005 6646 5937 6223 6218 2772 3384 1985 4873 3382 +2005 599 895 944 1063 2100 1674 2645 1386 777 +2006 5885 5198 5704 5824 2473 3216 1800 4894 3563 +2006 485 697 732 1033 2053 1646 2787 1480 918 +2007 6538 5743 6075 6389 2522 3191 2106 4919 3510 +2007 445 697 881 1102 2220 1892 2478 1578 827 +2008 6434 5767 6679 7124 2707 3604 2126 5210 3572 +2008 462 670 683 818 1995 1535 2502 1400 918 +2009 6644 5910 6512 6845 2809 3541 2152 5117 3544 +2009 350 526 532 697 2030 1477 2590 1407 893 +2010 5935 5539 6188 6570 3163 3954 2450 5061 3628 +2010 634 913 963 1095 2271 1974 2754 1370 674 +2011 6113 5471 6173 6570 2564 3347 2113 5305 3823 +2011 553 840 858 1074 2260 1725 3112 1462 734 +2012 5563 4960 5356 5520 2305 2880 1648 4561 3418 +2012 563 819 974 1221 2163 1760 2913 1581 917 +2013 6425 5827 6623 7140 2736 3651 2325 5264 3367 +2013 540 685 689 892 2001 1438 2535 1470 889 +2014 6676 6190 7196 7309 2961 3935 2421 4739 2777 +2014 420 600 609 812 2000 1491 2474 1438 1068 +2015 6520 5762 6165 6093 2496 3224 2085 4597 2902 +2015 556 809 729 941 2397 1717 2742 1484 1068 +2016 5928 5338 5701 5792 2464 3095 1750 4620 3035 +2016 625 891 958 1072 2404 1956 2882 1501 929 +2017 6037 5318 5684 6004 2239 2837 1580 4574 3190 +2017 451 665 708 910 2247 1585 2718 1549 1056 +2018 6323 5769 6434 6975 2638 3479 2252 4810 3172 +2018 668 890 972 1134 2411 1928 2855 1573 1004 +2019 6538 5736 6427 7082 2392 3181 2143 5310 3547 +2019 536 787 832 951 2503 1886 2759 1397 845 +2020 5822 5198 5855 6326 2263 3064 1812 4784 3219 +2020 645 848 831 964 2335 1636 2735 1682 1071 +2021 5799 5261 5747 6061 2366 3166 1911 4693 3338 +2021 604 837 911 1093 2226 1611 2644 1583 1040 +2022 6019 5635 6344 6905 2523 3438 2200 5124 3366 +2022 647 838 816 1050 2302 1728 2992 1586 1088 +2023 5564 4954 5417 5929 2150 2826 1725 5117 3608 +2023 518 683 713 1042 2258 1669 3117 1478 832 +2024 5643 4981 5306 5716 2271 3024 1844 4753 3352 +2024 620 867 899 1046 2399 1859 3073 1710 1056 +2025 5924 5319 5952 6383 2344 3161 1949 4877 3290 +2025 609 845 851 1046 2400 1773 2901 1568 1023 +2026 5901 5298 5937 6376 2329 3148 1942 4871 3283 +2026 615 853 856 1051 2415 1781 2915 1575 1032 +2027 5879 5277 5923 6369 2313 3136 1935 4865 3277 +2027 621 860 861 1056 2430 1788 2928 1583 1040 +2028 5856 5256 5908 6362 2298 3123 1928 4858 3270 +2028 627 868 866 1061 2445 1796 2942 1591 1049 +2029 5833 5235 5894 6355 2283 3111 1921 4851 3263 +2029 633 876 871 1066 2460 1803 2955 1598 1057 +2030 5810 5214 5879 6348 2267 3098 1914 4844 3256 +2030 640 884 876 1071 2476 1811 2969 1606 1066 +2031 5787 5193 5864 6341 2251 3086 1907 4837 3249 +2031 646 892 881 1076 2491 1818 2982 1613 1075 +2032 5764 5172 5850 6334 2235 3073 1900 4830 3242 +2032 652 900 886 1081 2507 1826 2996 1621 1083 +2033 5741 5151 5835 6327 2220 3060 1893 4823 3234 +2033 659 908 890 1086 2522 1834 3010 1629 1092 +2034 5718 5130 5820 6319 2204 3048 1886 4816 3227 +2034 665 915 895 1092 2538 1841 3023 1636 1101 +2035 5695 5109 5806 6312 2188 3035 1879 4809 3219 +2035 671 923 900 1097 2553 1849 3037 1644 1109 +2036 5672 5088 5791 6304 2173 3022 1872 4802 3212 +2036 678 931 905 1102 2569 1857 3050 1651 1118 +2037 5649 5068 5776 6297 2157 3009 1865 4796 3204 +2037 684 939 910 1107 2585 1864 3064 1659 1127 +2038 5626 5047 5762 6289 2141 2996 1858 4789 3197 +2038 690 947 915 1112 2600 1872 3077 1666 1135 +2039 5603 5026 5747 6282 2125 2984 1852 4782 3189 +2039 697 955 920 1117 2616 1879 3091 1674 1144 +2040 5579 5005 5732 6274 2110 2971 1845 4776 3181 +2040 703 963 925 1122 2632 1887 3104 1681 1153 +2041 5556 4984 5717 6267 2094 2958 1838 4769 3174 +2041 710 971 930 1128 2648 1895 3118 1689 1161 +2042 5533 4963 5703 6259 2079 2945 1831 4763 3166 +2042 716 978 935 1133 2663 1902 3131 1696 1170 +2043 5510 4943 5688 6252 2063 2933 1825 4756 3158 +2043 722 986 940 1138 2679 1910 3145 1703 1179 +2044 5487 4922 5673 6244 2047 2920 1818 4750 3150 +2044 729 994 945 1143 2695 1918 3158 1711 1188 +2045 5464 4901 5658 6236 2032 2907 1811 4744 3143 +2045 735 1002 950 1148 2711 1925 3172 1718 1196 +2046 5440 4880 5644 6229 2016 2894 1805 4737 3135 +2046 741 1010 955 1153 2727 1933 3185 1725 1205 +2047 5417 4859 5629 6221 2001 2881 1798 4731 3127 +2047 748 1018 960 1159 2742 1940 3199 1733 1214 +2048 5394 4838 5614 6213 1985 2869 1791 4725 3119 +2048 754 1026 965 1164 2758 1948 3212 1740 1222 +2049 5371 4818 5599 6206 1970 2856 1785 4718 3112 +2049 760 1033 970 1169 2774 1956 3226 1747 1231 +2050 5348 4797 5585 6198 1955 2843 1778 4712 3104 +2050 767 1041 975 1174 2790 1963 3239 1755 1240 \ No newline at end of file diff --git a/aeo_updates/temperature_gas_price_regression/inputs/nst-est2020.xlsx b/aeo_updates/temperature_gas_price_regression/inputs/nst-est2020.xlsx new file mode 100644 index 0000000..ffd5bcb Binary files /dev/null and b/aeo_updates/temperature_gas_price_regression/inputs/nst-est2020.xlsx differ diff --git a/aeo_updates/temperature_gas_price_regression/inspect_hub_locations.ipynb b/aeo_updates/temperature_gas_price_regression/inspect_hub_locations.ipynb new file mode 100644 index 0000000..0640ff0 --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/inspect_hub_locations.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2b4f813c-9374-4cd4-962b-3a70d4b267f1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Especially on Linux, gdxpds should be imported before pandas to avoid a library conflict. Also make sure your GAMS directory is listed in LD_LIBRARY_PATH.\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "import sys\n", + "\n", + "reeds_path = '' # User should specify path to ReEDS repository here\n", + "sys.path.append(reeds_path)\n", + "import reeds" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7e7c9b42-1b34-4b13-a497-5151dc98aa08", + "metadata": {}, + "outputs": [], + "source": [ + "state_fips_to_abbrev_map = {\n", + " '01': 'AL',\n", + " '02': 'AK',\n", + " '04': 'AZ',\n", + " '05':'AR',\n", + " '06':'CA',\n", + " '08':'CO',\n", + " '09':'CT',\n", + " '10':'DE',\n", + " '11':'DC',\n", + " '12':'FL',\n", + " '13':'GA',\n", + " '15':'HI',\n", + " '16':'ID',\n", + " '17':'IL',\n", + " '18':'IN',\n", + " '19':'IA',\n", + " '20':'KS',\n", + " '21':'KY',\n", + " '22':'LA',\n", + " '23':'ME',\n", + " '24':'MD',\n", + " '25':'MA',\n", + " '26':'MI',\n", + " '27':'MN',\n", + " '28':'MS',\n", + " '29':'MO',\n", + " '30':'MT',\n", + " '31':'NE',\n", + " '32':'NV',\n", + " '33':'NH',\n", + " '34':'NJ',\n", + " '35':'NM',\n", + " '36':'NY',\n", + " '37':'NC',\n", + " '38':'ND',\n", + " '39':'OH',\n", + " '40':'OK',\n", + " '41':'OR',\n", + " '42':'PA',\n", + " '44':'RI',\n", + " '45':'SC',\n", + " '46':'SD',\n", + " '47':'TN',\n", + " '48':'TX',\n", + " '49':'UT',\n", + " '50':'VT',\n", + " '51':'VA',\n", + " '53':'WA',\n", + " '54':'WV',\n", + " '55':'WI',\n", + " '56':'WY',\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f321f065-2cf0-43c6-9e7a-e32a9df8c70e", + "metadata": {}, + "outputs": [], + "source": [ + "gasreg_state_map = {\n", + " 'Northwest': ['OR', 'WA'],\n", + " 'California': ['CA'],\n", + " 'New England': ['ME', 'NH', 'VT', 'MA', 'CT', 'RI'],\n", + " 'Mid-Atlantic': ['NY', 'NJ', 'PA'],\n", + " 'South Atlantic': ['DC', 'DE', 'MD', 'NC', 'SC', 'GA', 'FL', 'WV', 'VA'],\n", + " 'East North Central': ['WI', 'IL', 'IN', 'MI', 'OH'],\n", + " 'Mountain': ['MT', 'ID', 'WY', 'CO', 'UT', 'NV'],\n", + " 'Southwest': ['AZ', 'NM'],\n", + " 'West South Central': ['TX', 'OK', 'AR', 'LA'],\n", + " 'East South Central': ['KY', 'TN', 'MS', 'AL'],\n", + " 'West North Central': ['ND', 'MN', 'SD', 'NE', 'IA', 'KS', 'MO']\n", + "}\n", + "\n", + "state_region_map = {}\n", + "for region, state_list in gasreg_state_map.items():\n", + " for state in state_list:\n", + " state_region_map[state] = region" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69c2873c-e224-4ba5-ba68-34030bb8862b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Hitachi Energy Barnett': 'West South Central',\n", + " 'Hitachi Energy California - North': 'California',\n", + " 'Hitachi Energy California - South': 'California',\n", + " 'Hitachi Energy Chicago Metro': 'East North Central',\n", + " 'Hitachi Energy Green River': 'Mountain',\n", + " 'Hitachi Energy Gulf Coast ELA': 'West South Central',\n", + " 'Hitachi Energy Gulf Coast ETX': 'West South Central',\n", + " 'Hitachi Energy Gulf Coast STX': 'West South Central',\n", + " 'Hitachi Energy Gulf Coast WLA': 'West South Central',\n", + " 'Hitachi Energy Haynesville': 'West South Central',\n", + " 'Hitachi Energy Henry Hub': 'West South Central',\n", + " 'Hitachi Energy Marcellus - Central': 'Mid-Atlantic',\n", + " 'Hitachi Energy Marcellus - Lower': 'South Atlantic',\n", + " 'Hitachi Energy Marcellus - Upper': 'Mid-Atlantic',\n", + " 'Hitachi Energy Michigan': 'East North Central',\n", + " 'Hitachi Energy Michigan/Ontario': 'East North Central',\n", + " 'Hitachi Energy Mid-Atlantic': 'South Atlantic',\n", + " 'Hitachi Energy Midcontinent - Central': 'West North Central',\n", + " 'Hitachi Energy Midcontinent - East': 'East North Central',\n", + " 'Hitachi Energy Midcontinent - Lower': 'West South Central',\n", + " 'Hitachi Energy Midcontinent - Upper': 'West North Central',\n", + " 'Hitachi Energy Mojave': 'Southwest',\n", + " 'Hitachi Energy New England': 'New England',\n", + " 'Hitachi Energy New York - Upstate': 'Mid-Atlantic',\n", + " 'Hitachi Energy New York/Long Island': 'Mid-Atlantic',\n", + " 'Hitachi Energy New York/Niagara': 'Mid-Atlantic',\n", + " 'Hitachi Energy Niobrara DJ': 'Mountain',\n", + " 'Hitachi Energy Northwest': 'Northwest',\n", + " 'Hitachi Energy Permian': 'West South Central',\n", + " 'Hitachi Energy Piceance/Uinta': 'Mountain',\n", + " 'Hitachi Energy San Juan': 'Southwest',\n", + " 'Hitachi Energy South Atlantic': 'South Atlantic',\n", + " 'Hitachi Energy TexOK': 'West South Central'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "county_geo = reeds.spatial.get_map('county', source='tiger')\n", + "\n", + "hub_locations = pd.read_excel('//nrelnas01/ReEDS/FY26_NatGas_KO/HE_VelSuite_GasIndices.xlsx')\n", + "hub_locations = gpd.GeoDataFrame(\n", + " hub_locations,\n", + " geometry=gpd.points_from_xy(hub_locations.Longitude, hub_locations.Latitude),\n", + " crs='EPSG:4326'\n", + ")\n", + "hub_locations = hub_locations.to_crs(county_geo.crs)\n", + "\n", + "county_geo['STCODE'] = county_geo['STATEFP'].map(state_fips_to_abbrev_map)\n", + "county_geo['gasreg'] = county_geo.STCODE.map(state_region_map)\n", + "gasreg_geo = county_geo.dissolve('gasreg')\n", + "hub_locations = gpd.sjoin(hub_locations, gasreg_geo[['geometry']])\n", + "hub_gasreg_map = dict(zip(hub_locations['Price_Hub'], hub_locations['gasreg']))\n", + "\n", + "hub_gasreg_map" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/aeo_updates/temperature_gas_price_regression/write_gasreg_regression_data.ipynb b/aeo_updates/temperature_gas_price_regression/write_gasreg_regression_data.ipynb new file mode 100644 index 0000000..a1c6137 --- /dev/null +++ b/aeo_updates/temperature_gas_price_regression/write_gasreg_regression_data.ipynb @@ -0,0 +1,462 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b87eb8ef-97dd-4fbe-a37c-702ee0ce269a", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import requests\n", + "import urllib3\n", + "import os\n", + "from pathlib import Path\n", + "from urllib3.exceptions import InsecureRequestWarning\n", + "import warnings\n", + "\n", + "urllib3.disable_warnings(InsecureRequestWarning)\n", + "warnings.simplefilter(action='ignore', category=pd.errors.ParserWarning)\n", + "\n", + "reeds_path = os.path.expanduser('~/Documents/Github/ReEDS/ReEDS/')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e02c7de3-e6a4-434c-b3ce-5e7c1022b83d", + "metadata": {}, + "outputs": [], + "source": [ + "state_abbrev_map = {\n", + " 'Alabama': 'AL',\n", + " 'Alaska': 'AK',\n", + " 'Arizona': 'AZ',\n", + " 'Arkansas': 'AR',\n", + " 'California': 'CA',\n", + " 'Colorado': 'CO',\n", + " 'Connecticut': 'CT',\n", + " 'Delaware': 'DE',\n", + " 'District of Columbia': 'DC',\n", + " 'Florida': 'FL',\n", + " 'Georgia': 'GA',\n", + " 'Hawaii': 'HI',\n", + " 'Idaho': 'ID',\n", + " 'Illinois': 'IL',\n", + " 'Indiana': 'IN',\n", + " 'Iowa': 'IA',\n", + " 'Kansas': 'KS',\n", + " 'Kentucky': 'KY',\n", + " 'Louisiana': 'LA',\n", + " 'Maine': 'ME',\n", + " 'Maryland': 'MD',\n", + " 'Massachusetts': 'MA',\n", + " 'Michigan': 'MI',\n", + " 'Minnesota': 'MN',\n", + " 'Mississippi': 'MS',\n", + " 'Missouri': 'MO',\n", + " 'Montana': 'MT',\n", + " 'Nebraska': 'NE',\n", + " 'Nevada': 'NV',\n", + " 'New Hampshire': 'NH',\n", + " 'New Jersey': 'NJ',\n", + " 'New Mexico': 'NM',\n", + " 'New York': 'NY',\n", + " 'North Carolina': 'NC', \n", + " 'North Dakota': 'ND',\n", + " 'Ohio': 'OH',\n", + " 'Oklahoma': 'OK',\n", + " 'Oregon': 'OR',\n", + " 'Pennsylvania': 'PA',\n", + " 'Rhode Island': 'RI',\n", + " 'South Carolina': 'SC',\n", + " 'South Dakota': 'SD',\n", + " 'Tennessee': 'TN',\n", + " 'Texas': 'TX',\n", + " 'Utah': 'UT',\n", + " 'Vermont': 'VT',\n", + " 'Virginia': 'VA',\n", + " 'Washington': 'WA',\n", + " 'West Virginia': 'WV',\n", + " 'Wisconsin': 'WI',\n", + " 'Wyoming': 'WY'\n", + "}\n", + "abbrev_state_map = {v:k for k,v in state_abbrev_map.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e7afbf50-cb9e-4af1-b657-44d7e48a27fa", + "metadata": {}, + "outputs": [], + "source": [ + "# Download state- and cendiv-level daily HDD/CDDs \n", + "def download_daily_state_degree_day_data(outdir):\n", + " for dd_type in ['cdd', 'hdd']:\n", + " for year in range(2014, 2025):\n", + " if dd_type == 'cdd':\n", + " energy_type = 'Cooling'\n", + " else:\n", + " energy_type = 'Heating'\n", + " \n", + " url = f\"https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/{year}/StatesCONUS.{energy_type}.txt\"\n", + " fpath = Path(outdir, f\"daily_state_{dd_type}_{year}.txt\")\n", + " \n", + " response = requests.get(url, verify=False)\n", + " if response.status_code == 200:\n", + " with open(fpath, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(response.text)\n", + " else:\n", + " print(f\"Error: {response.status_code}\")\n", + "\n", + "def download_daily_cendiv_degree_day_data(outdir):\n", + " for dd_type in ['cdd', 'hdd']:\n", + " for year in range(2014, 2025):\n", + " if dd_type == 'cdd':\n", + " energy_type = 'Cooling'\n", + " else:\n", + " energy_type = 'Heating'\n", + " \n", + " url = f\"https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/{year}/Population.{energy_type}.txt\"\n", + " fpath = Path(outdir, f\"daily_cendiv_{dd_type}_{year}.txt\")\n", + " \n", + " response = requests.get(url, verify=False)\n", + " if response.status_code == 200:\n", + " with open(fpath, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(response.text)\n", + " else:\n", + " print(f\"Error: {response.status_code}\")\n", + "\n", + "\n", + "download_daily_state_degree_day_data('inputs')\n", + "download_daily_cendiv_degree_day_data('inputs')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8edfbf13-5656-4c53-9738-bdf0f2e0007c", + "metadata": {}, + "outputs": [], + "source": [ + "def read_historical_state_populations(fpath, year_range):\n", + " hist_population = pd.read_excel(fpath, skiprows=3)\n", + " hist_population = (\n", + " hist_population.rename(columns={'Unnamed: 0': 'state'})\n", + " .set_index('state')\n", + " [year_range]\n", + " .dropna(subset=year_range[0])\n", + " .iloc[5:]\n", + " )\n", + " hist_population.index = hist_population.index.str.replace('.', '')\n", + "\n", + " return hist_population\n", + "\n", + "\n", + "hist_populations_2010s = read_historical_state_populations(\n", + " Path('inputs', 'nst-est2020.xlsx'),\n", + " range(2010, 2020)\n", + ")\n", + "hist_populations_2020s = read_historical_state_populations(\n", + " Path('inputs', 'NST-EST2025-POP.xlsx'),\n", + " range(2020, 2026)\n", + ")\n", + "hist_populations = pd.concat(\n", + " [hist_populations_2010s, hist_populations_2020s],\n", + " axis=1\n", + ")\n", + "hist_populations.index = hist_populations.index.map(state_abbrev_map)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a8224995-7e14-4280-a8ef-132d53dd5c1e", + "metadata": {}, + "outputs": [], + "source": [ + "gasreg_state_map = {\n", + " 'Northwest': ['OR', 'WA'],\n", + " 'California': ['CA'],\n", + " 'Mountain': ['MT', 'ID', 'WY', 'CO', 'UT', 'NV'],\n", + " 'Southwest': ['AZ', 'NM']\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "31789b0b-4ad5-414a-aa4e-6405549b4c76", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate daily HDD/CDDs for gasregs that are not census divisions\n", + "def calculate_daily_gasreg_degree_days(dd_type, hist_populations, gasreg_state_map):\n", + " gasreg_dd = []\n", + " for year in range(2014, 2025):\n", + " df = pd.read_csv(\n", + " Path('inputs', f'daily_state_{dd_type}_{year}.txt'),\n", + " sep='|',\n", + " skiprows=3\n", + " )\n", + " df = df.rename(columns={'Region': 'state'}).set_index('state').transpose()\n", + " \n", + " gasreg_dds = {}\n", + " for gasreg, states in gasreg_state_map.items():\n", + " state_weighted_dds = []\n", + " gasreg_population = hist_populations.loc[states].sum()\n", + " \n", + " for state in states:\n", + " weight = hist_populations.loc[state][year] / gasreg_population.loc[year]\n", + " weighted_dd = df[state] * weight\n", + " state_weighted_dds.append(weighted_dd)\n", + " \n", + " gasreg_dds[gasreg] = pd.concat(state_weighted_dds, axis=1).sum(axis=1)\n", + " \n", + " gasreg_dds = pd.concat(gasreg_dds, axis=1)\n", + " gasreg_dd.append(gasreg_dds)\n", + " \n", + " gasreg_dd = pd.concat(gasreg_dd)\n", + " gasreg_dd.index = pd.to_datetime(gasreg_dd.index)\n", + " gasreg_dd = gasreg_dd.set_index([\n", + " gasreg_dd.index.year,\n", + " gasreg_dd.index.month,\n", + " gasreg_dd.index.day\n", + " ])\n", + " gasreg_dd.index = gasreg_dd.index.rename(['year', 'month', 'day'])\n", + "\n", + " return gasreg_dd\n", + "\n", + "gasreg_cdd = calculate_daily_gasreg_degree_days('cdd', hist_populations, gasreg_state_map)\n", + "gasreg_hdd = calculate_daily_gasreg_degree_days('hdd', hist_populations, gasreg_state_map)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "62235a25-34a6-477e-9554-820b3529cc1f", + "metadata": {}, + "outputs": [], + "source": [ + "# Get daily HDD/CDDs for census divisions\n", + "cendiv_number_name_map = {\n", + " '1': 'New_England',\n", + " '2': 'Mid_Atlantic',\n", + " '3': 'East_North_Central',\n", + " '4': 'West_North_Central',\n", + " '5': 'South_Atlantic',\n", + " '6': 'East_South_Central',\n", + " '7': 'West_South_Central',\n", + " '8': 'Mountain',\n", + " '9': 'Pacific',\n", + " 'CONUS': 'CONUS'\n", + "}\n", + "\n", + "def get_census_division_degree_days(year, dd_type):\n", + " dd = pd.read_csv(\n", + " Path('inputs', f\"daily_cendiv_{dd_type}_{year}.txt\"),\n", + " sep='|',\n", + " skiprows=3\n", + " )\n", + " dd['Region'] = dd['Region'].map(cendiv_number_name_map)\n", + " dd = (\n", + " dd.dropna(subset='Region')\n", + " .set_index('Region')\n", + " .transpose()\n", + " )\n", + " dd.index = pd.to_datetime(dd.index)\n", + "\n", + " return dd\n", + "\n", + "hdd_list = []\n", + "cdd_list = []\n", + "for year in range(2014, 2025):\n", + " hdd_list.append(get_census_division_degree_days(year, 'hdd'))\n", + " cdd_list.append(get_census_division_degree_days(year, 'cdd'))\n", + "\n", + "cendiv_hdd = pd.concat(hdd_list)\n", + "cendiv_hdd = cendiv_hdd.set_index([\n", + " cendiv_hdd.index.year,\n", + " cendiv_hdd.index.month,\n", + " cendiv_hdd.index.day\n", + "])\n", + "cendiv_hdd.index = cendiv_hdd.index.rename(['year', 'month', 'day'])\n", + "\n", + "cendiv_cdd = pd.concat(cdd_list)\n", + "cendiv_cdd = cendiv_cdd.set_index([\n", + " cendiv_cdd.index.year,\n", + " cendiv_cdd.index.month,\n", + " cendiv_cdd.index.day\n", + "])\n", + "cendiv_cdd.index = cendiv_cdd.index.rename(['year', 'month', 'day'])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "128ee3e6-f141-4ab3-afa0-11aac77a96f7", + "metadata": {}, + "outputs": [], + "source": [ + "# Combine to get daily HDD/CDDs for all gasregs\n", + "gasreg_hdd = pd.concat([\n", + " cendiv_hdd.drop(columns=['Mountain', 'Pacific', 'CONUS']),\n", + " gasreg_hdd\n", + "], axis=1)\n", + "\n", + "gasreg_cdd = pd.concat([\n", + " cendiv_cdd.drop(columns=['Mountain', 'Pacific', 'CONUS']),\n", + " gasreg_cdd\n", + "], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "0c3d7ba7-d62c-4417-8efa-e5ab7be55232", + "metadata": {}, + "outputs": [], + "source": [ + "# Get natural gas hub prices\n", + "hub_prices = pd.read_excel(\n", + " '//nrelnas01/ReEDS/FY26_NatGas_KO/Natural Gas Daily Hub Prices - 07-17-2025 - Internal NREL only.xlsx'\n", + ")\n", + "hub_prices = hub_prices.loc[(\n", + " hub_prices['Delivery Date'].dt.year.isin(range(2014, 2025))\n", + ")].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c145037a-43fd-4294-9109-ae772bf4ca28", + "metadata": {}, + "outputs": [], + "source": [ + "# Assign hubs to gasregs. These are largely based on finding the geographic\n", + "# overlap between hub locations and gasregs in inspect_hub_locations.ipynb.\n", + "# In some cases, (e.g., East North Central), only a subset of the identified\n", + "# hub locations is chosen because it results in a better correlation (i.e.,\n", + "# higher HDD/CDD coefficients in the regression)\n", + "region_hub_map = {\n", + " 'California': ['California - North', 'California - South'],\n", + " 'East_North_Central': ['Chicago Metro'],\n", + " 'East_South_Central': ['Marcellus - Lower'],\n", + " 'Mid_Atlantic': ['Mid-Atlantic'],\n", + " 'Mountain': ['Niobrara DJ', 'Green River', 'Piceance/Uinta'],\n", + " 'New_England': ['New England'],\n", + " 'Northwest': ['Northwest'],\n", + " 'South_Atlantic': ['South Atlantic', 'Marcellus - Lower', 'Mid-Atlantic'],\n", + " 'Southwest': ['Mojave', 'San Juan'],\n", + " 'West_North_Central': ['Midcontinent - Upper', 'Midcontinent - Central'],\n", + " 'West_South_Central': [\n", + " 'Barnett',\n", + " 'Gulf Coast ELA',\n", + " 'Gulf Coast ETX',\n", + " 'Gulf Coast STX',\n", + " 'Gulf Coast WLA',\n", + " 'Haynesville',\n", + " 'Henry Hub',\n", + " 'Midcontinent - Lower',\n", + " 'TexOK',\n", + " ]\n", + "}\n", + "\n", + "# For each gasreg, calculate the volume-weighted average price of all hubs\n", + "def get_regional_volume_weighted_price(hub_prices, region, hub_names):\n", + " hub_names = [f\"Hitachi Energy {hub_name}\" for hub_name in hub_names]\n", + " select_hub_prices = (\n", + " hub_prices\n", + " .loc[hub_prices['Price Hub'].isin(hub_names)]\n", + " .copy()\n", + " .set_index('Delivery Date')\n", + " )\n", + "\n", + " if len(hub_names) > 1:\n", + " select_hub_prices['Total $'] = (\n", + " select_hub_prices['Wtd Avg Index $'] * select_hub_prices['Daily Volume']\n", + " )\n", + " select_hub_prices = (\n", + " select_hub_prices.groupby(level=0)\n", + " .sum(numeric_only=True)\n", + " )\n", + " select_hub_prices['volume_weighted_price'] = (\n", + " select_hub_prices['Total $'] / select_hub_prices['Daily Volume']\n", + " )\n", + " regional_volume_weighted_price = select_hub_prices['volume_weighted_price']\n", + " else:\n", + " regional_volume_weighted_price = select_hub_prices['Wtd Avg Index $']\n", + "\n", + " return regional_volume_weighted_price\n", + "\n", + "regional_prices = {}\n", + "for region, hub_names in region_hub_map.items():\n", + " regional_prices[region] = get_regional_volume_weighted_price(\n", + " hub_prices,\n", + " region,\n", + " hub_names\n", + " )\n", + "\n", + "regional_prices = pd.concat(regional_prices, axis=1)\n", + "regional_prices = regional_prices.set_index([\n", + " regional_prices.index.year,\n", + " regional_prices.index.month,\n", + " regional_prices.index.day\n", + "])\n", + "regional_prices = regional_prices.rename_axis(index=['year', 'month', 'day'])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6fac292e-e9e4-46a6-ac0c-2ee01144cec3", + "metadata": {}, + "outputs": [], + "source": [ + "# Combine daily HDD/CDD and gas price data\n", + "cdd_data = gasreg_cdd.loc[regional_prices.index].copy()\n", + "hdd_data = gasreg_hdd.loc[regional_prices.index].copy()\n", + "\n", + "cdd_data.columns = [f\"{col}_cdd\" for col in cdd_data.columns]\n", + "hdd_data.columns = [f\"{col}_hdd\" for col in hdd_data.columns]\n", + "\n", + "data = pd.concat([cdd_data, hdd_data], axis=1).fillna(0)\n", + "for region in region_hub_map.keys():\n", + " data[f\"{region}_price\"] = regional_prices[region]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c35aa6e3-94ac-4340-a15b-0adf2e6f4279", + "metadata": {}, + "outputs": [], + "source": [ + "# Export\n", + "data.to_csv(Path('inputs', 'gasreg_regression_data.csv'))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}