From 9f8e256909b3c814f69ff96a6fe7a1161d62a73c Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Wed, 5 Jan 2022 23:57:39 +0100 Subject: [PATCH 01/19] Add map_variables argument to read_crn --- pvlib/iotools/crn.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index c3b0e8776f..3f5524221a 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -40,7 +40,7 @@ ] -def read_crn(filename): +def read_crn(filename, map_variables=True): """ Read a NOAA USCRN fixed-width file into pandas dataframe. The CRN is described in [1]_ and [2]_. @@ -49,6 +49,9 @@ def read_crn(filename): ---------- filename: str, path object, or file-like filepath or url to read for the fixed-width file. + map_variables: bool, default: True + When true, renames columns of the Dataframe to pvlib variable names + where applicable. See variable VARIABLE_MAP. Returns ------- @@ -116,6 +119,7 @@ def read_crn(filename): # consider replacing with .replace([-99, -999, -9999]) data = data.where(data != val, np.nan) - data = data.rename(columns=VARIABLE_MAP) + if map_variables: + data = data.rename(columns=VARIABLE_MAP) return data From a10fe285a9f98882a01330e8397ce0f803710f5c Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Thu, 6 Jan 2022 01:17:55 +0100 Subject: [PATCH 02/19] Add test coverage for map_variables --- pvlib/tests/iotools/test_crn.py | 43 +++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py index f974e8961f..1434b8d839 100644 --- a/pvlib/tests/iotools/test_crn.py +++ b/pvlib/tests/iotools/test_crn.py @@ -7,7 +7,7 @@ @pytest.fixture -def columns(): +def columns_mapped(): return [ 'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', 'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi', @@ -17,6 +17,16 @@ def columns(): 'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag'] +@pytest.fixture +def columns_unmapped(): + return [ + 'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', + 'LONGITUDE', 'LATITUDE', 'AIR_TEMPERATURE', 'PRECIPITATION', + 'SOLAR_RADIATION', 'SR_FLAG', 'SURFACE_TEMPERATURE', 'ST_TYPE', + 'ST_FLAG', 'RELATIVE_HUMIDITY', 'RH_FLAG', 'SOIL_MOISTURE_5', + 'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'WIND_1_5', 'WIND_FLAG'] + + @pytest.fixture def dtypes(): return [ @@ -39,7 +49,7 @@ def testfile_problems(): return DATA_DIR / 'CRN_with_problems.txt' -def test_read_crn(testfile, columns, dtypes): +def test_read_crn(testfile, columns_mapped, dtypes): index = pd.DatetimeIndex(['2019-01-01 16:10:00', '2019-01-01 16:15:00', '2019-01-01 16:20:00', @@ -54,14 +64,37 @@ def test_read_crn(testfile, columns, dtypes): 0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0], [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0, 0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]]) - expected = pd.DataFrame(values, columns=columns, index=index) + expected = pd.DataFrame(values, columns=columns_mapped, index=index) for (col, _dtype) in zip(expected.columns, dtypes): expected[col] = expected[col].astype(_dtype) out = crn.read_crn(testfile) assert_frame_equal(out, expected) -def test_read_crn_problems(testfile_problems, columns, dtypes): +# Test map_variables=False +def test_read_crn_map_variables(testfile, columns_unmapped, dtypes): + index = pd.DatetimeIndex(['2019-01-01 16:10:00', + '2019-01-01 16:15:00', + '2019-01-01 16:20:00', + '2019-01-01 16:25:00'], + freq=None).tz_localize('UTC') + values = np.array([ + [53131, 20190101, 1610, 20190101, 910, 3, -111.17, 32.24, nan, + 0.0, 296.0, 0, 4.4, 'C', 0, 90.0, 0, nan, nan, 24, 0, 0.78, 0], + [53131, 20190101, 1615, 20190101, 915, 3, -111.17, 32.24, 3.3, + 0.0, 183.0, 0, 4.0, 'C', 0, 87.0, 0, nan, nan, 1182, 0, 0.36, 0], + [53131, 20190101, 1620, 20190101, 920, 3, -111.17, 32.24, 3.5, + 0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0], + [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0, + 0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]]) + expected = pd.DataFrame(values, columns=columns_unmapped, index=index) + for (col, _dtype) in zip(expected.columns, dtypes): + expected[col] = expected[col].astype(_dtype) + out = crn.read_crn(testfile, map_variables=False) + assert_frame_equal(out, expected) + + +def test_read_crn_problems(testfile_problems, columns_mapped, dtypes): # GH1025 index = pd.DatetimeIndex(['2020-07-06 12:00:00', '2020-07-06 13:10:00'], @@ -72,7 +105,7 @@ def test_read_crn_problems(testfile_problems, columns, dtypes): [92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62, 26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0, 1.64, 0]]) - expected = pd.DataFrame(values, columns=columns, index=index) + expected = pd.DataFrame(values, columns=columns_mapped, index=index) for (col, _dtype) in zip(expected.columns, dtypes): expected[col] = expected[col].astype(_dtype) out = crn.read_crn(testfile_problems) From 456cb717906318ad1f34de95b5ad05c2a394d17d Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 7 Jan 2022 01:12:20 +0100 Subject: [PATCH 03/19] Update whatsnew --- docs/sphinx/source/whatsnew/v0.9.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sphinx/source/whatsnew/v0.9.1.rst b/docs/sphinx/source/whatsnew/v0.9.1.rst index 64fc94cfa1..f29d4a3bd8 100644 --- a/docs/sphinx/source/whatsnew/v0.9.1.rst +++ b/docs/sphinx/source/whatsnew/v0.9.1.rst @@ -11,6 +11,7 @@ Deprecations Enhancements ~~~~~~~~~~~~ +* Added ``map_variables`` option to :func:`~pvlib.iotools.read_crn` (:pull:`1368`) Bug fixes ~~~~~~~~~ From 6bc69500332fd9a9b36ae5b114fe7f21fd7680ed Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 7 Jan 2022 01:25:33 +0100 Subject: [PATCH 04/19] Remove unnecessary tz_localize --- pvlib/iotools/crn.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 3f5524221a..61c3e8d24c 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -106,11 +106,6 @@ def read_crn(filename, map_variables=True): dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4), format='%Y%m%d%H%M', utc=True) data = data.set_index(dtindex) - try: - # to_datetime(utc=True) does not work in older versions of pandas - data = data.tz_localize('UTC') - except TypeError: - pass # Now we can set nans. This could be done a per column basis to be # safer, since in principle a real -99 value could occur in a -9999 From 106030a744731045e748579b41586e7d3f3ccf57 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 7 Jan 2022 01:27:00 +0100 Subject: [PATCH 05/19] Replace nans with .replace instead of .where --- pvlib/iotools/crn.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 61c3e8d24c..68fbf19074 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -110,9 +110,7 @@ def read_crn(filename, map_variables=True): # Now we can set nans. This could be done a per column basis to be # safer, since in principle a real -99 value could occur in a -9999 # column. Very unlikely to see that in the real world. - for val in [-99, -999, -9999]: - # consider replacing with .replace([-99, -999, -9999]) - data = data.where(data != val, np.nan) + data = data.replace([-99, -999, -9999], np.nan) if map_variables: data = data.rename(columns=VARIABLE_MAP) From 1fcaa9773cb3e63f8874d6771ab9194b6edaeec2 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 7 Jan 2022 02:28:24 +0100 Subject: [PATCH 06/19] Extend documentation --- pvlib/iotools/crn.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 68fbf19074..b29b423a19 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -41,9 +41,13 @@ def read_crn(filename, map_variables=True): - """ - Read a NOAA USCRN fixed-width file into pandas dataframe. The CRN is - described in [1]_ and [2]_. + """Read a NOAA USCRN fixed-width file into a pandas dataframe. + + The CRN network consists of a +100 meteorological stations covering the + U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to + provide long-term measurements of temperature, precipitation, and soil + moisture and temperature. Additionally, global horizontal irradiance (GHI) + is measured using a photodiode pyranometer. Parameters ---------- @@ -63,10 +67,10 @@ def read_crn(filename, map_variables=True): ----- CRN files contain 5 minute averages labeled by the interval ending time. Here, missing data is flagged as NaN, rather than the lowest - possible integer for a field (e.g. -999 or -99). Air temperature in - deg C. Wind speed in m/s at a height of 1.5 m above ground level. + possible integer for a field (e.g. -999 or -99). Air temperature is in + deg C and wind speed is in m/s at a height of 1.5 m above ground level. - Variables corresponding to standard pvlib variables are renamed, + Variables corresponding to standard pvlib variables are by default renamed, e.g. `SOLAR_RADIATION` becomes `ghi`. See the `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping. From 02fbfbb747dbd7be2d309d3a8d11712b704fbb14 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 7 Jan 2022 02:28:58 +0100 Subject: [PATCH 07/19] Simply test coverage of map_variables --- pvlib/tests/iotools/test_crn.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py index 1434b8d839..b3119cec39 100644 --- a/pvlib/tests/iotools/test_crn.py +++ b/pvlib/tests/iotools/test_crn.py @@ -71,27 +71,10 @@ def test_read_crn(testfile, columns_mapped, dtypes): assert_frame_equal(out, expected) -# Test map_variables=False +# Test map_variables=False returns correct column names def test_read_crn_map_variables(testfile, columns_unmapped, dtypes): - index = pd.DatetimeIndex(['2019-01-01 16:10:00', - '2019-01-01 16:15:00', - '2019-01-01 16:20:00', - '2019-01-01 16:25:00'], - freq=None).tz_localize('UTC') - values = np.array([ - [53131, 20190101, 1610, 20190101, 910, 3, -111.17, 32.24, nan, - 0.0, 296.0, 0, 4.4, 'C', 0, 90.0, 0, nan, nan, 24, 0, 0.78, 0], - [53131, 20190101, 1615, 20190101, 915, 3, -111.17, 32.24, 3.3, - 0.0, 183.0, 0, 4.0, 'C', 0, 87.0, 0, nan, nan, 1182, 0, 0.36, 0], - [53131, 20190101, 1620, 20190101, 920, 3, -111.17, 32.24, 3.5, - 0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0], - [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0, - 0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]]) - expected = pd.DataFrame(values, columns=columns_unmapped, index=index) - for (col, _dtype) in zip(expected.columns, dtypes): - expected[col] = expected[col].astype(_dtype) out = crn.read_crn(testfile, map_variables=False) - assert_frame_equal(out, expected) + assert (out.columns == columns_unmapped).all() def test_read_crn_problems(testfile_problems, columns_mapped, dtypes): From 427fa38dd343a49ac0844fc43d8ca7dcd7f9a9d9 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 7 Jan 2022 14:19:39 +0100 Subject: [PATCH 08/19] Use assert_index_equal instead of assert Co-authored-by: Kevin Anderson <57452607+kanderso-nrel@users.noreply.github.com> --- pvlib/tests/iotools/test_crn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py index b3119cec39..586ea91c6e 100644 --- a/pvlib/tests/iotools/test_crn.py +++ b/pvlib/tests/iotools/test_crn.py @@ -74,7 +74,7 @@ def test_read_crn(testfile, columns_mapped, dtypes): # Test map_variables=False returns correct column names def test_read_crn_map_variables(testfile, columns_unmapped, dtypes): out = crn.read_crn(testfile, map_variables=False) - assert (out.columns == columns_unmapped).all() + assert_index_equal(out.columns, pd.Index(columns_unmapped)) def test_read_crn_problems(testfile_problems, columns_mapped, dtypes): From ebe5510fd9d897a1b1b33bc52dec4c6271ec2208 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 7 Jan 2022 14:23:28 +0100 Subject: [PATCH 09/19] Add import of assert_index_equal --- pvlib/tests/iotools/test_crn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py index 586ea91c6e..8e1891918a 100644 --- a/pvlib/tests/iotools/test_crn.py +++ b/pvlib/tests/iotools/test_crn.py @@ -3,7 +3,7 @@ from numpy import dtype, nan import pytest from pvlib.iotools import crn -from ..conftest import DATA_DIR, assert_frame_equal +from ..conftest import DATA_DIR, assert_frame_equal, assert_index_equal @pytest.fixture From a4099150421e93dcaa57a2351969336dac793677 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Sat, 8 Jan 2022 00:39:01 +0100 Subject: [PATCH 10/19] Add -99999 and -999999 to nan values --- pvlib/iotools/crn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index b29b423a19..530cc1ce5b 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -114,7 +114,7 @@ def read_crn(filename, map_variables=True): # Now we can set nans. This could be done a per column basis to be # safer, since in principle a real -99 value could occur in a -9999 # column. Very unlikely to see that in the real world. - data = data.replace([-99, -999, -9999], np.nan) + data = data.replace([-99, -999, -9999, -99999, -999999], np.nan) if map_variables: data = data.rename(columns=VARIABLE_MAP) From 81ead2d9f05fca5d239657843a9fa81cf6ec996d Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Sat, 8 Jan 2022 12:10:48 +0100 Subject: [PATCH 11/19] Add -99999 nan bug to whatsnew --- docs/sphinx/source/whatsnew/v0.9.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/source/whatsnew/v0.9.1.rst b/docs/sphinx/source/whatsnew/v0.9.1.rst index f29d4a3bd8..6391abb778 100644 --- a/docs/sphinx/source/whatsnew/v0.9.1.rst +++ b/docs/sphinx/source/whatsnew/v0.9.1.rst @@ -23,7 +23,7 @@ Bug fixes argument was not being passed to the ``optimalinclination`` request parameter (:pull:`1356`) * Fixed bug in :py:func:`pvlib.bifacial.pvfactors_timeseries` where scalar ``surface_tilt`` and ``surface_azimuth`` inputs caused an error (:issue:`1127`, :issue:`1332`, :pull:`1361`) - +* Added -99999 to list of nan characters in :func:`~pvlib.iotools.read_crn` (:issue:`1372`, :pull:`1368`) Testing ~~~~~~~ From ee56796d983b61eee16f29fb3afedad3fc4702d5 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 10 Jan 2022 22:59:30 +0100 Subject: [PATCH 12/19] Add -99999 to test file --- pvlib/data/CRN_with_problems.txt | Bin 2025 -> 2025 bytes pvlib/tests/iotools/test_crn.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/data/CRN_with_problems.txt b/pvlib/data/CRN_with_problems.txt index f6bdfc54d6747e86d45f2081112191405532959a..25023dcaee8b1b317c41708d60a28e40788f5cba 100644 GIT binary patch literal 2025 zcmeH|I}XDz3`6&xf)|J&%b!Sd?|-M#c7OzJ&@LUV!8T<*rFH4yj7`q&Ua+q{1oM`F~ngYLdSoSxBD}j~3 zN?;}MhXmpcZCUFDZB7Lv$?nnC#D|5BHdfICZMioNv^mLjhc@>EZKV)xA2!mJuuk>W V#!1+2BTs6AHteU3X*o&jHeL;V5NZGb literal 2025 zcmeH|I}QUe2t-ro6kK2t;Lk$6_rFudA1Rw;l{6_$1B0+Nn&ySMDVdnaE|nDhsjw32 zm54SMt^sAv)!NoFH%%nm&oqSkV1)}EV~&KrMX=y6^fuv1%vwZx5_yKZ67xDOjw8dYU72l RIZeXChW)fLEGyS$;{nKrMT-Cc diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py index 8e1891918a..ddaac455f8 100644 --- a/pvlib/tests/iotools/test_crn.py +++ b/pvlib/tests/iotools/test_crn.py @@ -83,8 +83,8 @@ def test_read_crn_problems(testfile_problems, columns_mapped, dtypes): '2020-07-06 13:10:00'], freq=None).tz_localize('UTC') values = np.array([ - [92821, 20200706, 1200, 20200706, 700, '3', -80.69, 28.62, 24.9, - 0.0, 190.0, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0], + [92821, 20200706, 1200, 20200706, 700, 3.0, -80.69, 28.62, 24.9, + 0.0, np.nan, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0], [92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62, 26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0, 1.64, 0]]) From b041b8e375642b9fa549a0d1a11f80a6d970faa4 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 10 Jan 2022 22:59:44 +0100 Subject: [PATCH 13/19] Update doc and whatsnew --- docs/sphinx/source/whatsnew/v0.9.1.rst | 3 ++- pvlib/iotools/crn.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.9.1.rst b/docs/sphinx/source/whatsnew/v0.9.1.rst index 6391abb778..880df071ec 100644 --- a/docs/sphinx/source/whatsnew/v0.9.1.rst +++ b/docs/sphinx/source/whatsnew/v0.9.1.rst @@ -23,7 +23,8 @@ Bug fixes argument was not being passed to the ``optimalinclination`` request parameter (:pull:`1356`) * Fixed bug in :py:func:`pvlib.bifacial.pvfactors_timeseries` where scalar ``surface_tilt`` and ``surface_azimuth`` inputs caused an error (:issue:`1127`, :issue:`1332`, :pull:`1361`) -* Added -99999 to list of nan characters in :func:`~pvlib.iotools.read_crn` (:issue:`1372`, :pull:`1368`) +* Added -99999 to list of values to map to nan in :func:`~pvlib.iotools.read_crn` + (:issue:`1372`, :pull:`1368`) Testing ~~~~~~~ diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 530cc1ce5b..ec1ec60735 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -43,11 +43,11 @@ def read_crn(filename, map_variables=True): """Read a NOAA USCRN fixed-width file into a pandas dataframe. - The CRN network consists of a +100 meteorological stations covering the + The CRN network consists of over 100 meteorological stations covering the U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to provide long-term measurements of temperature, precipitation, and soil moisture and temperature. Additionally, global horizontal irradiance (GHI) - is measured using a photodiode pyranometer. + is measured at each site using a photodiode pyranometer. Parameters ---------- From a75337756614c74f8abbdcb8b43ae6f5248ba36e Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 10 Jan 2022 23:07:25 +0100 Subject: [PATCH 14/19] Remove -999999 from list of nans --- pvlib/iotools/crn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index ec1ec60735..fcae38fcb3 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -114,7 +114,7 @@ def read_crn(filename, map_variables=True): # Now we can set nans. This could be done a per column basis to be # safer, since in principle a real -99 value could occur in a -9999 # column. Very unlikely to see that in the real world. - data = data.replace([-99, -999, -9999, -99999, -999999], np.nan) + data = data.replace([-99, -999, -9999, -99999], np.nan) if map_variables: data = data.rename(columns=VARIABLE_MAP) From a66f71d5cc650f708830eaf994b3a669fc5df6f8 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 11 Jan 2022 01:32:00 +0100 Subject: [PATCH 15/19] Minor doc update --- pvlib/iotools/crn.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index fcae38fcb3..c8a46f5ddc 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -12,7 +12,7 @@ 'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' ) -VARIABLE_MAP = { +CRN_VARIABLE_MAP = { 'LONGITUDE': 'longitude', 'LATITUDE': 'latitude', 'AIR_TEMPERATURE': 'temp_air', @@ -53,9 +53,9 @@ def read_crn(filename, map_variables=True): ---------- filename: str, path object, or file-like filepath or url to read for the fixed-width file. - map_variables: bool, default: True + map_variables: boolean, default: True When true, renames columns of the Dataframe to pvlib variable names - where applicable. See variable VARIABLE_MAP. + where applicable. See variable :const:`CRN_VARIABLE_MAP`. Returns ------- @@ -72,7 +72,7 @@ def read_crn(filename, map_variables=True): Variables corresponding to standard pvlib variables are by default renamed, e.g. `SOLAR_RADIATION` becomes `ghi`. See the - `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping. + :const:`pvlib.iotools.crn.CRN_VARIABLE_MAP` dict for the complete mapping. CRN files occasionally have a set of null characters on a line instead of valid data. This function drops those lines. Sometimes @@ -117,6 +117,6 @@ def read_crn(filename, map_variables=True): data = data.replace([-99, -999, -9999, -99999], np.nan) if map_variables: - data = data.rename(columns=VARIABLE_MAP) + data = data.rename(columns=CRN_VARIABLE_MAP) return data From 457fd5b088969aa833e0f3f394f22daecf6bc900 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 12 Jan 2022 01:40:50 +0100 Subject: [PATCH 16/19] Add dictionary of nan values --- pvlib/iotools/crn.py | 30 +++++++++++++++++++----------- pvlib/tests/iotools/test_crn.py | 2 +- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index c8a46f5ddc..d637e29406 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -24,6 +24,21 @@ 'WIND_FLAG': 'wind_speed_flag' } +NAN_DICT = { + 'CRX_VN': -99999, + 'AIR_TEMPERATURE': -9999, + 'PRECIPITATION': -9999, + 'SOLAR_RADIATION': -99999, + 'SURFACE_TEMPERATURE': -9999, + 'RELATIVE_HUMIDITY': -9999, + 'SOIL_MOISTURE_5': -99, + 'SOIL_TEMPERATURE_5': -9999, + 'WETNESS': -9999, + 'WIND_1_5': -99} + +# Add NUL characters to possible NaN values for all columns +NAN_DICT = {k: [v, '\x00\x00\x00\x00\x00\x00'] for k, v in NAN_DICT.items()} + # as specified in CRN README.txt file. excludes 1 space between columns WIDTHS = [5, 8, 4, 8, 4, 6, 7, 7, 7, 7, 6, 1, 7, 1, 1, 5, 1, 7, 7, 5, 1, 6, 1] # add 1 to make fields contiguous (required by pandas.read_fwf) @@ -92,13 +107,11 @@ def read_crn(filename, map_variables=True): Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1` """ - # read in data. set fields with NUL characters to NaN + # read in data data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '), - widths=WIDTHS, na_values=['\x00\x00\x00\x00\x00\x00']) - # at this point we only have NaNs from NUL characters, not -999 etc. - # these bad rows need to be removed so that dtypes can be set. - # NaNs require float dtype so we run into errors if we don't do this. - data = data.dropna(axis=0) + widths=WIDTHS, na_values=NAN_DICT) + # Remove rows with all nans + data = data.dropna(axis=0, how='all') # loop here because dtype kwarg not supported in read_fwf until 0.20 for (col, _dtype) in zip(data.columns, DTYPES): data[col] = data[col].astype(_dtype) @@ -111,11 +124,6 @@ def read_crn(filename, map_variables=True): format='%Y%m%d%H%M', utc=True) data = data.set_index(dtindex) - # Now we can set nans. This could be done a per column basis to be - # safer, since in principle a real -99 value could occur in a -9999 - # column. Very unlikely to see that in the real world. - data = data.replace([-99, -999, -9999, -99999], np.nan) - if map_variables: data = data.rename(columns=CRN_VARIABLE_MAP) diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py index ddaac455f8..b19888dda1 100644 --- a/pvlib/tests/iotools/test_crn.py +++ b/pvlib/tests/iotools/test_crn.py @@ -83,7 +83,7 @@ def test_read_crn_problems(testfile_problems, columns_mapped, dtypes): '2020-07-06 13:10:00'], freq=None).tz_localize('UTC') values = np.array([ - [92821, 20200706, 1200, 20200706, 700, 3.0, -80.69, 28.62, 24.9, + [92821, 20200706, 1200, 20200706, 700, '3.0', -80.69, 28.62, 24.9, 0.0, np.nan, 0, 25.5, 'C', 0, 93.0, 0, nan, nan, 990, 0, 1.57, 0], [92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62, 26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0, From 999738be529d8cf02b3f32a749b30bf0fda6ca5f Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 12 Jan 2022 01:43:55 +0100 Subject: [PATCH 17/19] Change CRN_VARIABLE_MAP back to VARIABLE_MAP --- pvlib/iotools/crn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index d637e29406..c833ce4547 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -12,7 +12,7 @@ 'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' ) -CRN_VARIABLE_MAP = { +VARIABLE_MAP = { 'LONGITUDE': 'longitude', 'LATITUDE': 'latitude', 'AIR_TEMPERATURE': 'temp_air', @@ -70,7 +70,7 @@ def read_crn(filename, map_variables=True): filepath or url to read for the fixed-width file. map_variables: boolean, default: True When true, renames columns of the Dataframe to pvlib variable names - where applicable. See variable :const:`CRN_VARIABLE_MAP`. + where applicable. See variable :const:`VARIABLE_MAP`. Returns ------- @@ -87,7 +87,7 @@ def read_crn(filename, map_variables=True): Variables corresponding to standard pvlib variables are by default renamed, e.g. `SOLAR_RADIATION` becomes `ghi`. See the - :const:`pvlib.iotools.crn.CRN_VARIABLE_MAP` dict for the complete mapping. + :const:`pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping. CRN files occasionally have a set of null characters on a line instead of valid data. This function drops those lines. Sometimes @@ -125,6 +125,6 @@ def read_crn(filename, map_variables=True): data = data.set_index(dtindex) if map_variables: - data = data.rename(columns=CRN_VARIABLE_MAP) + data = data.rename(columns=VARIABLE_MAP) return data From 667f243911dd4b139673f8e0764ef76b8df408e2 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 12 Jan 2022 01:59:31 +0100 Subject: [PATCH 18/19] Remove numpy import --- pvlib/iotools/crn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index c833ce4547..8d779e50cb 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -2,7 +2,6 @@ """ import pandas as pd -import numpy as np HEADERS = ( From a07acbc6ce5bb1a4b3ea51532f58a4981be634d4 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 12 Jan 2022 02:12:57 +0100 Subject: [PATCH 19/19] Reformat setting of dtypes --- pvlib/iotools/crn.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 8d779e50cb..fe46bb69d2 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -4,12 +4,12 @@ import pandas as pd -HEADERS = ( - 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE ' - 'AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG ' - 'SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG ' - 'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' -) +HEADERS = [ + 'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', + 'LONGITUDE', 'LATITUDE', 'AIR_TEMPERATURE', 'PRECIPITATION', + 'SOLAR_RADIATION', 'SR_FLAG', 'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', + 'RELATIVE_HUMIDITY', 'RH_FLAG', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5', + 'WETNESS', 'WET_FLAG', 'WIND_1_5', 'WIND_FLAG'] VARIABLE_MAP = { 'LONGITUDE': 'longitude', @@ -107,13 +107,12 @@ def read_crn(filename, map_variables=True): """ # read in data - data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '), - widths=WIDTHS, na_values=NAN_DICT) + data = pd.read_fwf(filename, header=None, names=HEADERS, widths=WIDTHS, + na_values=NAN_DICT) # Remove rows with all nans data = data.dropna(axis=0, how='all') - # loop here because dtype kwarg not supported in read_fwf until 0.20 - for (col, _dtype) in zip(data.columns, DTYPES): - data[col] = data[col].astype(_dtype) + # set dtypes here because dtype kwarg not supported in read_fwf until 0.20 + data = data.astype(dict(zip(HEADERS, DTYPES))) # set index # UTC_TIME does not have leading 0s, so must zfill(4) to comply