From dc73b3f2bcdf07f36d0bb8087eaf49fe21529602 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Thu, 8 Feb 2024 19:16:37 -0500 Subject: [PATCH 01/14] create get_solrad function --- pvlib/iotools/solrad.py | 74 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index e5bb05d709..f59d783549 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -3,6 +3,8 @@ import numpy as np import pandas as pd +import urllib +import warnings # pvlib conventions BASE_HEADERS = ( @@ -121,3 +123,75 @@ def read_solrad(filename): pass return data + + +def get_solrad(station, start, end, + url="https://gml.noaa.gov/aftp/data/radiation/solrad/"): + """Request data from NOAA SOLRAD and read it into a Dataframe. + + A list of stations and their descriptions can be found in [1]_, + The data files are described in [2]_. + + Data is returned for complete days, including ``start`` and ``end``. + + Parameters + ---------- + station : str + Three letter station abbreviation. + start : datetime-like + First day of the requested period + end : datetime-like + Last day of the requested period + url : str, default: 'https://gml.noaa.gov/aftp/data/radiation/solrad/' + API endpoint URL + + Returns + ------- + data : pd.DataFrame + Dataframe with data from SOLRAD. + meta : dict + Metadata. + + Notes + ----- + Recent SOLRAD data is 1-minute averages. Prior to 2015-01-01, it was + 3-minute averages. + + Examples + -------- + >>> # Retrieve two months irradiance data from the ABQ SOLRAD station + >>> data, metadata = pvlib.iotools.get_solrad( + >>> start="2020-01-01", end="2020-01-31", station='abq') + + References + ---------- + .. [1] https://gml.noaa.gov/grad/solrad/index.html + .. [2] https://gml.noaa.gov/aftp/data/radiation/solrad/README_SOLRAD.txt + """ + # Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted + start = pd.to_datetime(start) + end = pd.to_datetime(end) + + # Generate list of filenames + dates = pd.date_range(start, end, freq='d', inclusive='both') + years = dates.year + daysofyear = dates.dayofyear + filenames = [ + f"{station}/{year}/{station}{year-2000}{doy:03}.dat" + for year, doy in zip(years, daysofyear) + ] + + dfs = [] # Initialize list of monthly dataframes + for f in filenames: + try: + dfi = read_solrad(url + f) + dfs.append(dfi) + except urllib.error.HTTPError: + warnings.warn(f"The following file was not found: {f}") + + data = pd.concat(dfs, axis='rows') + + meta = {'station': station, + 'filenames': filenames} + + return data, meta From c7624cd51f8ee51a085f4f9ab3005e4a08523457 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:11:11 +0100 Subject: [PATCH 02/14] Add init.py and iotools.rst entry --- docs/sphinx/source/reference/iotools.rst | 1 + pvlib/iotools/__init__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index cf6fbf4ac4..39081220f3 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -26,6 +26,7 @@ of sources and file formats relevant to solar energy modeling. iotools.read_midc_raw_data_from_nrel iotools.read_crn iotools.read_solrad + iotools.get_solrad iotools.get_psm3 iotools.read_psm3 iotools.parse_psm3 diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 2ec1753eb9..0fbec16c1f 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -8,6 +8,7 @@ from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401 from pvlib.iotools.crn import read_crn # noqa: F401 from pvlib.iotools.solrad import read_solrad # noqa: F401 +from pvlib.iotools.solrad import get_solrad # noqa: F401 from pvlib.iotools.psm3 import get_psm3 # noqa: F401 from pvlib.iotools.psm3 import read_psm3 # noqa: F401 from pvlib.iotools.psm3 import parse_psm3 # noqa: F401 From f43a24d679e4ebd678930acaaf88aada1164612a Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:12:57 -0500 Subject: [PATCH 03/14] Apply suggestions from code review Co-authored-by: Adam R. Jensen <39184289+AdamRJensen@users.noreply.github.com> --- pvlib/iotools/solrad.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index f59d783549..bfbeb75c41 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -161,7 +161,7 @@ def get_solrad(station, start, end, -------- >>> # Retrieve two months irradiance data from the ABQ SOLRAD station >>> data, metadata = pvlib.iotools.get_solrad( - >>> start="2020-01-01", end="2020-01-31", station='abq') + >>> station='abq', start="2020-01-01", end="2020-01-31") References ---------- @@ -174,11 +174,9 @@ def get_solrad(station, start, end, # Generate list of filenames dates = pd.date_range(start, end, freq='d', inclusive='both') - years = dates.year - daysofyear = dates.dayofyear filenames = [ - f"{station}/{year}/{station}{year-2000}{doy:03}.dat" - for year, doy in zip(years, daysofyear) + f"{station}/{d.year}/{station}{d.strftime('%y')}{d.dayofyear:03}.dat" + for d in dates ] dfs = [] # Initialize list of monthly dataframes From f083cbc73b701fb7eaf6bbfc37cf7d1c92311998 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:22:28 -0500 Subject: [PATCH 04/14] See Also cross-links with read_solrad --- pvlib/iotools/solrad.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index bfbeb75c41..9aa99f9c14 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -65,6 +65,10 @@ def read_solrad(filename): A dataframe with DatetimeIndex and all of the variables in the file. + See Also + -------- + get_solrad + Notes ----- SOLRAD data resolution is described by the README_SOLRAD.txt: @@ -152,6 +156,10 @@ def get_solrad(station, start, end, meta : dict Metadata. + See Also + -------- + read_solrad + Notes ----- Recent SOLRAD data is 1-minute averages. Prior to 2015-01-01, it was From fa9698601b64eb86bc0d4895440e9fe4848747b0 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:22:42 -0500 Subject: [PATCH 05/14] move Examples section below References --- pvlib/iotools/solrad.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index 9aa99f9c14..7a9ba24ce9 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -165,16 +165,16 @@ def get_solrad(station, start, end, Recent SOLRAD data is 1-minute averages. Prior to 2015-01-01, it was 3-minute averages. - Examples - -------- - >>> # Retrieve two months irradiance data from the ABQ SOLRAD station - >>> data, metadata = pvlib.iotools.get_solrad( - >>> station='abq', start="2020-01-01", end="2020-01-31") - References ---------- .. [1] https://gml.noaa.gov/grad/solrad/index.html .. [2] https://gml.noaa.gov/aftp/data/radiation/solrad/README_SOLRAD.txt + + Examples + -------- + >>> # Retrieve one month of irradiance data from the ABQ SOLRAD station + >>> data, metadata = pvlib.iotools.get_solrad( + >>> station='abq', start="2020-01-01", end="2020-01-31") """ # Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted start = pd.to_datetime(start) From 159532d4ec6883bd08a6d724a3f2595b2ea6491b Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:23:39 -0500 Subject: [PATCH 06/14] don't use "inclusive" with pd.date_range it is only available from pandas v1.4 onwards, so can't use it in pvlib yet --- pvlib/iotools/solrad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index 7a9ba24ce9..c1aa21894e 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -181,7 +181,7 @@ def get_solrad(station, start, end, end = pd.to_datetime(end) # Generate list of filenames - dates = pd.date_range(start, end, freq='d', inclusive='both') + dates = pd.date_range(start.floor('d'), end, freq='d') filenames = [ f"{station}/{d.year}/{station}{d.strftime('%y')}{d.dayofyear:03}.dat" for d in dates From 683816937d1eb7771c8cf6d2bf1b2cfb063a6223 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:23:48 -0500 Subject: [PATCH 07/14] station.lower() --- pvlib/iotools/solrad.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index c1aa21894e..bd0124c818 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -182,6 +182,7 @@ def get_solrad(station, start, end, # Generate list of filenames dates = pd.date_range(start.floor('d'), end, freq='d') + station = station.lower() filenames = [ f"{station}/{d.year}/{station}{d.strftime('%y')}{d.dayofyear:03}.dat" for d in dates From d5ddaf86538ee8187c4da99d56822b3f1ade3e13 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:23:58 -0500 Subject: [PATCH 08/14] add test --- pvlib/tests/iotools/test_solrad.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index f8f97af41f..9da48259e5 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -99,3 +99,24 @@ def test_read_solrad(testfile, index, columns, values, dtypes): expected[col] = expected[col].astype(_dtype) out = solrad.read_solrad(testfile) assert_frame_equal(out, expected) + + +@pytest.mark.remote_data +@pytest.mark.parametrize('testfile, station', [ + (testfile, 'abq'), + (testfile_mad, 'msn'), +]) +def test_get_solrad(testfile, station): + df, meta = solrad.get_solrad(station, "2019-02-25", "2019-02-25") + + assert meta['station'] == station + assert isinstance(meta['filenames'], list) + + assert len(df) == 1440 + assert df.index[0] == pd.to_datetime('2019-02-25 00:00+00:00') + assert df.index[-1] == pd.to_datetime('2019-02-25 23:59+00:00') + + expected = solrad.read_solrad(testfile) + actual = df.reindex(expected.index) + # ABQ test file has an unexplained NaN in row 4; just verify first 3 rows + assert_frame_equal(actual.iloc[:3], expected.iloc[:3]) From fe07d5fb391d8e1eb41d527873d06b9970f0214b Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:25:36 -0500 Subject: [PATCH 09/14] whatsnew --- docs/sphinx/source/whatsnew/v0.10.4.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/sphinx/source/whatsnew/v0.10.4.rst b/docs/sphinx/source/whatsnew/v0.10.4.rst index 3cab3fc8ad..fecb3c4613 100644 --- a/docs/sphinx/source/whatsnew/v0.10.4.rst +++ b/docs/sphinx/source/whatsnew/v0.10.4.rst @@ -8,6 +8,8 @@ v0.10.4 (Anticipated March, 2024) Enhancements ~~~~~~~~~~~~ * Added the Huld PV model used by PVGIS (:pull:`1940`) +* Added :py:func:`~pvlib.iotools.get_solrad` for fetching irradiance data from + the SOLRAD ground station network. (:pull:`1967`) Bug fixes @@ -33,3 +35,4 @@ Contributors * Cliff Hansen (:ghuser:`cwhanse`) * :ghuser:`matsuobasho` * Adam R. Jensen (:ghuser:`AdamRJensen`) +* Kevin Anderson (:ghuser:`kandersolar`) From 39a658b5d67a1e1eed40ea36d90eb94c0030c687 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:26:58 -0500 Subject: [PATCH 10/14] lint --- pvlib/tests/iotools/test_solrad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index 9da48259e5..4e8e68ac7e 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -108,7 +108,7 @@ def test_read_solrad(testfile, index, columns, values, dtypes): ]) def test_get_solrad(testfile, station): df, meta = solrad.get_solrad(station, "2019-02-25", "2019-02-25") - + assert meta['station'] == station assert isinstance(meta['filenames'], list) From 71aff105d097bd2c44188d51ebb890bdaa7f3031 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Fri, 9 Feb 2024 15:53:21 -0500 Subject: [PATCH 11/14] add another test for coverage --- pvlib/tests/iotools/test_solrad.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index 4e8e68ac7e..abedb48cfe 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -120,3 +120,15 @@ def test_get_solrad(testfile, station): actual = df.reindex(expected.index) # ABQ test file has an unexplained NaN in row 4; just verify first 3 rows assert_frame_equal(actual.iloc[:3], expected.iloc[:3]) + + +@pytest.mark.remote_data +def test_get_solrad_missing_day(): + # data availability begins for ABQ on 2002-02-01 (DOY 32), so requesting + # data before that will raise a warning + message = 'The following file was not found: abq/2002/abq02031.dat' + with pytest.warns(UserWarning, match=message): + df, meta = solrad.get_solrad('abq', '2002-01-31', '2002-02-01') + + # but the data for 2022-02-01 is still returned + assert not df.empty From 980af9a32e35727ba2b7c21de6875ee45deb96c3 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Tue, 5 Mar 2024 11:49:59 -0500 Subject: [PATCH 12/14] remove stray empty line --- docs/sphinx/source/whatsnew/v0.10.4.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/sphinx/source/whatsnew/v0.10.4.rst b/docs/sphinx/source/whatsnew/v0.10.4.rst index 006f85828c..c1c977b713 100644 --- a/docs/sphinx/source/whatsnew/v0.10.4.rst +++ b/docs/sphinx/source/whatsnew/v0.10.4.rst @@ -10,7 +10,6 @@ Enhancements * Added the Huld PV model used by PVGIS (:pull:`1940`) * Added :py:func:`~pvlib.iotools.get_solrad` for fetching irradiance data from the SOLRAD ground station network. (:pull:`1967`) - * Added metadata parsing to :py:func:`~pvlib.iotools.read_solrad` to follow the standard iotools convention of returning a tuple of (data, meta). Previously the function only returned a dataframe. (:pull:`1968`) From 21ed90cd299682d3313c20b2094d593e4f788a9e Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Tue, 5 Mar 2024 12:07:26 -0500 Subject: [PATCH 13/14] fix broken test --- pvlib/tests/iotools/test_solrad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index 1c4c882882..963014302b 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -134,7 +134,7 @@ def test_get_solrad(testfile, station): assert df.index[0] == pd.to_datetime('2019-02-25 00:00+00:00') assert df.index[-1] == pd.to_datetime('2019-02-25 23:59+00:00') - expected = solrad.read_solrad(testfile) + expected, _ = solrad.read_solrad(testfile) actual = df.reindex(expected.index) # ABQ test file has an unexplained NaN in row 4; just verify first 3 rows assert_frame_equal(actual.iloc[:3], expected.iloc[:3]) From eb32362c1b2fef5cf71b8089ecc79065ae277467 Mon Sep 17 00:00:00 2001 From: Kevin Anderson Date: Tue, 5 Mar 2024 12:21:38 -0500 Subject: [PATCH 14/14] fix problem with read_solrad for 404 URLs --- pvlib/iotools/solrad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index b6eae04541..90bf5b666e 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -1,7 +1,6 @@ """Functions to read data from the NOAA SOLRAD network.""" import pandas as pd -import urllib import warnings import requests import io @@ -110,6 +109,7 @@ def read_solrad(filename): if str(filename).startswith('ftp') or str(filename).startswith('http'): response = requests.get(filename) + response.raise_for_status() file_buffer = io.StringIO(response.content.decode()) else: with open(str(filename), 'r') as file_buffer: @@ -207,7 +207,7 @@ def get_solrad(station, start, end, try: dfi, file_metadata = read_solrad(url + f) dfs.append(dfi) - except urllib.error.HTTPError: + except requests.exceptions.HTTPError: warnings.warn(f"The following file was not found: {f}") data = pd.concat(dfs, axis='rows')