8000 Add encoding parameter to read_tmy3 by AdamRJensen · Pull Request #1737 · pvlib/pvlib-python · GitHub
[go: up one dir, main page]

Skip to content

Add encoding parameter to read_tmy3 #1737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/sphinx/source/whatsnew/v0.9.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Add optional encoding parameter to :py:func:`pvlib.iotools.read_tmy3`.
(:issue:`1732`, :pull:`1737`)
* Added function to retrieve horizon data from PVGIS
:py:func:`pvlib.iotools.get_pvgis_horizon`. (:issue:`1290`, :pull:`1395`)
* Added ``map_variables`` argument to the :py:func:`pvlib.iotools.read_tmy3` in
Expand Down Expand Up @@ -74,5 +76,6 @@ Contributors
* Siddharth Kaul (:ghuser:`k10blogger`)
* Kshitiz Gupta (:ghuser:`kshitiz305`)
* Stefan de Lange (:ghuser:`langestefan`)
* Andy Lam (:ghuser:`@andylam598`)
* :ghuser:`ooprathamm`
* Kevin Anderson (:ghuser:`kandersolar`)
43 changes: 21 additions & 22 deletions pvlib/iotools/tmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
}


def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):
def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None,
encoding=None):
"""Read a TMY3 file into a pandas dataframe.

Note that values contained in the metadata dictionary are unchanged
Expand All @@ -50,6 +51,11 @@ def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):
If ``True``, apply standard names to TMY3 columns. Typically this
results in stripping the units from the column name.
Cannot be used in combination with ``map_variables``.
encoding : str, optional
Encoding of the file. For files that contain non-UTF8 characters it may
be necessary to specify an alternative encoding, e.g., for
SolarAnywhere TMY3 files the encoding should be 'iso-8859-1'. Users
may also consider using the 'utf-8-sig' encoding.

Returns
-------
Expand All @@ -58,7 +64,7 @@ def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):
data : DataFrame
A pandas dataframe with the columns described in the table
below. For more detailed descriptions of each component, please
consult the TMY3 User's Manual ([1]_), especially tables 1-1
consult the TMY3 User's Manual [1]_, especially tables 1-1
through 1-6.

metadata : dict
Expand Down Expand Up @@ -187,14 +193,12 @@ def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):
""" # noqa: E501
head = ['USAF', 'Name', 'State', 'TZ', 'latitude', 'longitude', 'altitude']

try:
with open(str(filename), 'r') as fbuf:
firstline, data = _parse_tmy3(fbuf)
# SolarAnywhere files contain non-UTF8 characters and may require
# encoding='iso-8859-1' in order to be parsed
except UnicodeDecodeError:
with open(str(filename), 'r', encoding='iso-8859-1') as fbuf:
firstline, data = _parse_tmy3(fbuf)
with open(str(filename), 'r', encoding=encoding) as fbuf:
# header information on the 1st line (0 indexing)
firstline = fbuf.readline()
# use pandas to read the csv file buffer
# header is actually the second line, but tell pandas to look for
data = pd.read_csv(fbuf, header=0)

meta = dict(zip(head, firstline.rstrip('\n').split(",")))
# convert metadata strings to numeric types
Expand All @@ -206,8 +210,10 @@ def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):

# get the date column as a pd.Series of numpy datetime64
data_ymd = pd.to_datetime(data['Date (MM/DD/YYYY)'], format='%m/%d/%Y')
# extract minutes
minutes = data['Time (HH:MM)'].str.split(':').str[1].astype(int)
# shift the time column so that midnite is 00:00 instead of 24:00
shifted_hour = data['Time (HH:MM)'].str[:2].astype(int) % 24
shifted_hour = data['Time (HH:MM)'].str.split(':').str[0].astype(int) % 24
# shift the dates at midnight (24:00) so they correspond to the next day.
# If midnight is specified as 00:00 do not shift date.
data_ymd[data['Time (HH:MM)'].str[:2] == '24'] += datetime.timedelta(days=1) # noqa: E501
Expand All @@ -225,7 +231,8 @@ def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):
data_ymd.iloc[-1] = data_ymd.iloc[-1].replace(year=coerce_year+1)
# NOTE: as of pvlib-0.6.3, min req is pandas-0.18.1, so pd.to_timedelta
# unit must be in (D,h,m,s,ms,us,ns), but pandas>=0.24 allows unit='hour'
data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h')
data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h') \
+ pd.to_timedelta(minutes, unit='min')
# shouldnt' specify both recolumn and map_variables
if recolumn is not None and map_variables is not None:
msg = "`map_variables` and `recolumn` cannot both be specified"
Expand All @@ -252,15 +259,6 @@ def read_tmy3(filename, coerce_year=None, map_variables=None, recolumn=None):
return data, meta


def _parse_tmy3(fbuf):
# header information on the 1st line (0 indexing)
firstline = fbuf.readline()
# use pandas to read the csv file buffer
# header is actually the second line, but tell pandas to look for
data = pd.read_csv(fbuf, header=0)
return firstline, data


def _recolumn(tmy3_dataframe):
"""
Rename the columns of the TMY3 DataFrame.
Expand Down Expand Up @@ -328,7 +326,7 @@ def read_tmy2(filename):
data : DataFrame
A dataframe with the columns described in the table below. For a
more detailed descriptions of each component, please consult the
TMY2 User's Manual ([1]_), especially tables 3-1 through 3-6, and
TMY2 User's Manual [1]_, especially tables 3-1 through 3-6, and
Appendix B.

metadata : dict
Expand Down Expand Up @@ -430,6 +428,7 @@ def read_tmy2(filename):
----------
.. [1] Marion, W and Urban, K. "Wilcox, S and Marion, W. "User's Manual
for TMY2s". NREL 1995.
:doi:`10.2172/87130`
""" # noqa: E501
# paste in the column info as one long line
string = '%2d%2d%2d%2d%4d%4d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%2d%1s%1d%2d%1s%1d%4d%1s%1d%4d%1s%1d%3d%1s%1d%4d%1s%1d%3d%1s%1d%3d%1s%1d%4d%1s%1d%5d%1s%1d%10d%3d%1s%1d%3d%1s%1d%3d%1s%1d%2d%1s%1d' # noqa: E501
Expand Down
3 changes: 2 additions & 1 deletion pvlib/tests/iotools/test_tmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ def test_solaranywhere_tmy3(solaranywhere_index):
# The SolarAnywhere TMY3 format specifies midnight as 00:00 whereas the
# NREL TMY3 format utilizes 24:00. The SolarAnywhere file is therefore
# included to test files with 00:00 timestamps are parsed correctly
data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE, map_variables=False)
data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE, encoding='iso-8859-1',
map_variables=False)
pd.testing.assert_index_equal(data.index, solaranywhere_index)
assert meta['USAF'] == 0
assert meta['Name'] == 'Burlington United States'
Expand Down
0