Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 53 additions & 80 deletions psh/calculate_existing_psh_capacities.py
Original file line number Diff line number Diff line change
@@ -1,113 +1,84 @@
# -*- coding: utf-8 -*-
"""
Calculating Existing Pumped Storage Hydropower Capacities for ReEDS

This script takes in raw data received from Oak Ridge National Laboratory (ORNL) and
calculates the county-level opeartional and pump capacities [MW] and max energy [MWh].

Most of the ORNL data is sourced from the Global Energy Storage Database (GESDB),
containing Rated Power, Energy, Duration, and lat/lon data. Counties are mapped to the
ORNL data using a county-level CONUS shapefile before aggregating the data to county-level.

Procedures are adapted from the original version of this script developed by @kodiobika

@author: jvcarag
@date: 20260618 12:21
"""

import pandas as pd
import os
import geopandas as gpd
import sys
from shapely.geometry import Point

# Locations were recorded manually using
# https://hydroreform.org/on-your-river/ as a reference
station_name_location_map = {
'Edward C Hyatt': '39.5388,-121.4855',
'Hiwassee': '35.152,-84.178',
'San Luis (W R Gianelli)': '37.069173,-121.077265',
'Rocky River': '41.5725,-73.4445',
'Wallace Dam': '33.3502,-83.1574',
'Olivenhain-Hodges Storage Project': '33.058037,-117.118823',
'Horse Mesa': '33.590552,-111.343907',
'Helms': '37.0385,-118.9661',
'Jocassee': '34.9606,-82.9183',
'Mormon Flat': '33.55356,-111.442849',
'John W Keys III (Grand Coulee)': '47.957511,-118.977323',
'Bad Creek': '35.0075,-83.0181',
'Lewiston Niagara': '43.1422,-79.0402',
'Raccoon Mountain': '35.045,-85.40833',
'Mount Elbert': '39.094197,-106.352331',
'Castaic': '34.6443,-118.7643',
'Flatiron': '40.418428,-105.374757',
'ONeill': '37.098671,-121.047718',
'Richard B Russell': '34.026023,-82.593974',
'Thermalito Hydro Power Plant': '39.5252,-121.6012',
'Muddy Run': '39.8169,-76.3011',
'Blenheim Gilboa': '42.4451,-74.4384',
'Salina': '36.2661,-95.1056',
'Northfield Mountain': '42.6102,-72.4388',
'Ludington': '43.895,-86.4283',
'Seneca': '41.8388,-79.0059',
'Big Creek (J S Eastwood)': '37.1596,-119.2449',
'Bath County': '38.2266,-79.8237',
'Fairfield': '34.3122,-81.3281',
'Waddell': '33.8464,-112.2663',
'Yards Creek': '41.0015,-75.0445',
'Bear Swamp (Jack Cockwell)': '42.6838,-72.9603',
'Taum Sauk': '37.5333,-90.8167',
'Smith Mountain': '37.041,-79.5359',
'Rocky Mountain': '34.355642,-85.304002',
'Clarence Cannon': '39.524417,-91.643946',
'Cabin Creek': '39.65,-105.7189',
'Carters': '34.616677,-84.673061',
'Degray': '34.22,-93.11'
}
reeds_path = os.path.expanduser('~/github/ReEDS')
sys.path.append(reeds_path)
import reeds

def main():
reeds_path = os.path.expanduser('~/github/ReEDS')
outdir_path = os.path.join(reeds_path, 'inputs', 'storage')
dfcounty = gpd.read_file(
os.path.join(
reeds_path,
'inputs',
'shapefiles',
'US_COUNTY_2022'
)
)

# Read operational data for existing PSH fleet
psh_data = pd.read_excel(
'data/IHA US PSH for NREL.xlsx',
sheet_name='Operational'
)
# Add location information
psh_data['location'] = (
psh_data['Station Name\xa0'].map(station_name_location_map)
)
psh_data[['latitude', 'longitude']] = (
psh_data['location'].str.split(',', expand=True)
# Ingest county-level CONUS shapefile
dfcounty = reeds.spatial.get_map('county').reset_index()
# Ingest operational data for existing PSH fleet
psh_data_in = pd.read_excel(
os.path.join('data','GESDB_Projects_complete RS_v3_fromORNL.xlsx'),
sheet_name='Summary'
)

# Convert site-level PSH data to geopandas dataframe
psh_data = (
gpd.GeoDataFrame(
psh_data,
psh_data_in,
geometry=[
Point(xy)
for xy
in zip(psh_data['longitude'], psh_data['latitude'])
in zip(psh_data_in['Longitude'], psh_data_in['Latitude'])
]
)
.set_crs(epsg=4326)
)
# Spatially join with counties to determine each plant's county
psh_data = (
psh_data.to_crs(dfcounty.crs)
.sjoin(dfcounty[['rb', 'geometry']])
.rename(columns={'rb': 'r'})
.sjoin(dfcounty[['GEOID', 'geometry']])
.rename(columns={'GEOID': 'r'})
.drop(
columns=[
'location',
'latitude',
'longitude',
'Latitude',
'Longitude',
'geometry',
'index_right'
]
)
)
# Add tech and tech vintage columns (needed later in ReEDS)
# and get operational capacities, pump capacities,
# and max energies, and calculate totals for each county
# Add 'p' prefix to all FIPS in 'r' column
psh_data['r'] = 'p' + psh_data['r']

# Add tech and tech vintage columns (needed later in ReEDS),
psh_data.insert(0, '*i', 'pumped-hydro')
psh_data.insert(0, 'v', 'init-1')
psh_data = (
# Convert Rated Power to MW
psh_data['Rated Power (MW)'] = psh_data['Rated Power(kW)'] * 1e-3
# Create a duplicate column as a placeholder for pump capacity
psh_data['Pump Capacity (MW)'] = psh_data['Rated Power (MW)'].copy()

# Calculate county-level operational capacity, pump capacity, and max energy
psh_data_out = (
psh_data.rename(columns={
'Station Name\xa0': 'station',
'Operational Capacity (MW)\xa0': 'operational_capacity_MW',
'Pump Capacity': 'pump_capacity_MW',
'Gen Cap * Duration (MWh)': 'max_energy_MWh'
'Title': 'station',
'Rated Power (MW)': 'operational_capacity_MW',
'Pump Capacity (MW)': 'pump_capacity_MW',
'Energy (MWh)': 'max_energy_MWh'
})
[[
'*i',
Expand All @@ -122,9 +93,11 @@ def main():
.sum(numeric_only=True)
.round(1)
)
psh_data.to_csv(os.path.join(outdir_path, 'cap_existing_psh.csv'))
# Output data to ReEDS inputs folder
psh_data_out.to_csv(os.path.join(outdir_path, 'cap_existing_psh.csv'))

print(f"Run complete. See {outdir_path} for outputs.")


if __name__ == "__main__":
main()
Binary file not shown.
Binary file removed psh/data/IHA US PSH for NREL.xlsx
Binary file not shown.