8000 Add variable mapping of psm3 by AdamRJensen · Pull Request #1374 · pvlib/pvlib-python · GitHub
[go: up one dir, main page]

Skip to content

Add variable mapping of psm3 #1374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10000
Next Next commit
Add variable mapping of psm3
  • Loading branch information
AdamRJensen committed Jan 8, 2022
commit 5474a03c73da969a578bac90b6d15d20911b2079
58 changes: 53 additions & 5 deletions pvlib/iotools/psm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import requests
import pandas as pd
from json import JSONDecodeError
import warnings
from pvlib._deprecation import pvlibDeprecationWarning

NSRDB_API_BASE = "https://developer.nrel.gov"
PSM_URL = NSRDB_API_BASE + "/api/nsrdb/v2/solar/psm3-download.csv"
Expand All @@ -20,10 +22,30 @@
'surface_pressure', 'wind_direction', 'wind_speed')
PVLIB_PYTHON = 'pvlib python'

# Dictionary mapping PSM3 names to pvlib names
PSM3_VARIABLE_MAP = {
'GHI': 'ghi',
'DHI': 'dhi',
'DNI': 'dni',
'Clearsky GHI': 'ghi_clear',
'Clearsky DHI': 'dhi_clear',
'Clearsky DNI': 'dni_clear',
'Solar Zenith Angle': 'solar_zenith',
'Temperature': 'temp_air',
'Relative Humidity': 'relative_humidity',
'Dew point': 'temp_dew',
'Pressure': 'pressure',
'Wind Direction': 'wind_direction',
'Wind Speed': 'wind_speed',
'Latitude': 'latitude',
'Longitude': 'longitude',
'Elevation': 'elevation'
}


def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
attributes=ATTRIBUTES, leap_day=False, full_name=PVLIB_PYTHON,
affiliation=PVLIB_PYTHON, timeout=30):
affiliation=PVLIB_PYTHON, map_variables=None, timeout=30):
"""
Retrieve NSRDB PSM3 timeseries weather data from the PSM3 API. The NSRDB
is described in [1]_ and the PSM3 API is described in [2]_, [3]_, and [4]_.
Expand Down Expand Up @@ -61,6 +83,9 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
optional
affiliation : str, default 'pvlib python'
optional
map_variables: bool
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable PSM3_VARIABLE_MAP.
timeout : int, default 30
time in seconds to wait for server response before timeout

Expand Down Expand Up @@ -133,6 +158,13 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
# convert to string to accomodate integer years being passed in
names = str(names)

# convert pvlib names in attributes to psm3 convention (reverse mapping)
# unlike psm3 columns, attributes are lower case and with underscores
amap = {value : key.lower().replace(' ','_') for (key, value) in
PSM3_VARIABLE_MAP.items()}
attributes = [a if a not in amap.keys() else amap[a] for a in attributes]
attributes = list(set(attributes)) # remove duplicate values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting. Do we do "reverse mapping" in any other iotools functions? It is unfortunate that the PSM3 API's input parameter names are different from the output column names.

Maybe clearer for the second line, up to you: attributes = [amap.get(a, a) for a in attributes]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we keep this, it should probably be mentioned in the docstring

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot immediately think of any other function where reverse mapping would make sense. Pretty nifty though! And I think it is in line with the spirit of the iotools, in that it conforms the data access interface with pvlib conventions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AdamRJensen should we add a sentence to the attributes docstring description for this? Something like (feel free to edit):

Alternatively, pvlib names may also be used (e.g. 'ghi' rather than 'GHI'); see :const:PSM3_VARIABLE_MAP.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're referencing the iotools variable maps via :const: then we should think about giving them their own entries in the docs. Something like what we did for pvlib.temperature.TEMPERATURE_MODEL_PARAMETERS maybe? Let's do that in a separate issue though.


# required query-string parameters for request to PSM3 API
params = {
'api_key': api_key,
Expand Down Expand Up @@ -167,10 +199,10 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
# the CSV is in the response content as a UTF-8 bytestring
# to use pandas we need to create a file buffer from the response
fbuf = io.StringIO(response.content.decode('utf-8'))
return parse_psm3(fbuf)
return parse_psm3(fbuf, map_variables)


def parse_psm3(fbuf):
def parse_psm3(fbuf, map_variables):
"""
Parse an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.
Expand All @@ -184,6 +216,9 @@ def parse_psm3(fbuf):
----------
fbuf: file-like object
File-like object containing data to read.
map_variables: bool
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable PSM3_VARIABLE_MAP.

Returns
-------
Expand Down Expand Up @@ -296,10 +331,20 @@ def parse_psm3(fbuf):
tz = 'Etc/GMT%+d' % -metadata['Time Zone']
data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)

if map_variables is None:
warnings.warn(
'PSM3 variable names will be renamed to pvlib conventions by '
'default starting in pvlib 0.11.0. Specify map_variables=True '
'to enable that behavior now, or specify map_variables=False '
'to hide this warning.', pvlibDeprecationWarning)
map_variables = False
if map_variables:
data = data.rename(columns=PSM3_VARIABLE_MAP)

return data, metadata


def read_psm3(filename):
def read_psm3(filename, map_variables=None):
"""
Read an NSRDB PSM3 weather file (formatted as SAM CSV). The NSRDB
is described in [1]_ and the SAM CSV format is described in [2]_.
Expand All @@ -313,6 +358,9 @@ def read_psm3(filename):
----------
filename: str
Filename of a file containing data to read.
map_variables: bool
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable PSM3_VARIABLE_MAP.

Returns
-------
Expand All @@ -334,5 +382,5 @@ def read_psm3(filename):
<https://rredc.nrel.gov/solar/old_data/nsrdb/2005-2012/wfcsv.pdf>`_
"""
with open(str(filename), 'r') as fbuf:
content = parse_psm3(fbuf)
content = parse_psm3(fbuf, map_variables)
return content
43 changes: 35 additions & 8 deletions pvlib/tests/iotools/test_psm3.py
9E7A
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import os
from pvlib.iotools import psm3
from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY
from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal
import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -76,7 +76,8 @@ def assert_psm3_equal(data, metadata, expected):
def test_get_psm3_tmy(nrel_api_key):
"""test get_psm3 with a TMY"""
data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names='tmy-2017')
PVLIB_EMAIL, names='tmy-2017',
map_variables=False)
expected = pd.read_csv(TMY_TEST_DATA)
assert_psm3_equal(data, metadata, expected)

Expand All @@ -86,7 +87,8 @@ def test_get_psm3_tmy(nrel_api_key):
def test_get_psm3_singleyear(nrel_api_key):
"""test get_psm3 with a single year"""
data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names='2017', interval=30)
PVLIB_EMAIL, names='2017',
map_variables=False, interval=30)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)

Expand All @@ -96,7 +98,8 @@ def test_get_psm3_singleyear(nrel_api_key):
def test_get_psm3_5min(nrel_api_key):
"""test get_psm3 for 5-minute data"""
data, metadata = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names='2019', interval=5)
PVLIB_EMAIL, names='2019', interval=5,
map_variables=False)
assert len(data) == 525600/5
first_day = data.loc['2019-01-01']
expected = pd.read_csv(YEAR_TEST_DATA_5MIN)
Expand All @@ -108,7 +111,7 @@ def test_get_psm3_5min(nrel_api_key):
def test_get_psm3_check_leap_day(nrel_api_key):
data_2012, _ = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key,
PVLIB_EMAIL, names="2012", interval=60,
leap_day=True)
leap_day=True, map_variables=False)
assert len(data_2012) == (8760 + 24)


Expand All @@ -133,7 +136,7 @@ def test_get_psm3_tmy_errors(
"""
with pytest.raises(HTTPError) as excinfo:
psm3.get_psm3(latitude, longitude, api_key, PVLIB_EMAIL,
names=names, interval=interval)
names=names, interval=interval, map_variables=False)
# ensure the HTTPError caught isn't due to overuse of the API key
assert "OVER_RATE_LIMIT" not in str(excinfo.value)

Expand All @@ -149,13 +152,37 @@ def io_input(request):

def test_parse_psm3(io_input):
"""test parse_psm3"""
data, metadata = psm3.parse_psm3(io_input)
data, metadata = psm3.parse_psm3(io_input, map_variables=False)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)


def test_read_psm3():
"""test read_psm3"""
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA)
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=False)
expected = pd.read_csv(YEAR_TEST_DATA)
assert_psm3_equal(data, metadata, expected)


def test_read_psm3_map_variables():
"""test read_psm3 map_variables=True"""
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True)
columns_mapped = ['Year', 'Month', 'Day', 'Hour', 'Minute', 'dhi', 'dni',
'ghi', 'dhi_clear', 'dni_clear', 'ghi_clear',
'Cloud Type', 'Dew Point', 'solar_zenith', 'Fill Flag',
'Surface Albedo', 'wind_speed', 'Precipitable Water',
'wind_direction', 'relative_humidity', 'temp_air',
'pressure']
data, metadata = psm3.read_psm3(MANUAL_TEST_DATA, map_variables=True)
assert_index_equal(data.columns, pd.Index(columns_mapped))


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_psm3_attribute_mapping(nrel_api_key):
"""Test that pvlib names can be passed in as attributes and get correctly
reverse mapped to PSM3 names"""
data, _ = psm3.get_psm3(LATITUDE, LONGITUDE, nrel_api_key, PVLIB_EMAIL,
names=2019, interval=60, attributes=['ghi'],
map_variables=True)
assert 'ghi' in data.columns
0