8000 Add encoding parameter to read_tmy3 by AdamRJensen · Pull Request #1737 · pvlib/pvlib-python · GitHub
[go: up one dir, main page]

Skip to content

Add encoding parameter to read_tmy3 #1737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/sphinx/source/whatsnew/v0.9.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Add optional encoding parameter to :py:func:`pvlib.iotools.read_tmy3`.
(:issue:`1732`, :pull:`1737`)


Bug fixes
Expand Down Expand Up @@ -43,4 +45,4 @@ Contributors
* Siddharth Kaul (:ghuser:`k10blogger`)
* Kshitiz Gupta (:ghuser:`kshitiz305`)
* Stefan de Lange (:ghuser:`langestefan`)

* Andy Lam (:ghuser:`@andylam598`)
40 changes: 20 additions & 20 deletions pvlib/iotools/tmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd


def read_tmy3(filename, coerce_year=None, recolumn=True):
def read_tmy3(filename, coerce_year=None, recolumn=True, encoding=None):
"""Read a TMY3 file into a pandas dataframe.

Note that values contained in the metadata dictionary are unchanged
Expand All @@ -27,6 +27,11 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
recolumn : bool, default True
If ``True``, apply standard names to TMY3 columns. Typically this
results in stripping the units from the column name.
encoding : str, optional
Encoding of the file. For files that contain non-UTF8 characters it may
be necessary to specify an alternative encoding, e.g., for
SolarAnywhere TMY3 files the encoding should be 'iso-8859-1'. Users
may also consider using the 'utf-8-sig' encoding.

Returns
-------
Expand Down Expand Up @@ -152,21 +157,21 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
----------
.. [1] Wilcox, S and Marion, W. "Users Manual for TMY3 Data Sets".
NREL/TP-581-43156, Revised May 2008.
:doi:`10.2172/928611`
.. [2] Wilcox, S. (2007). National Solar Radiation Database 1991 2005
Update: Users Manual. 472 pp.; NREL Report No. TP-581-41364.
:doi:`10.2172/901864`
.. [3] `SolarAnywhere file formats
<https://www.solaranywhere.com/support/historical-data/file-formats/>`_
""" # noqa: E501
head = ['USAF', 'Name', 'State', 'TZ', 'latitude', 'longitude', 'altitude']

try:
with open(str(filename), 'r') as fbuf:
firstline, data = _parse_tmy3(fbuf)
# SolarAnywhere files contain non-UTF8 characters and may require
# encoding='iso-8859-1' in order to be parsed
except UnicodeDecodeError:
with open(str(filename), 'r', encoding='iso-8859-1') as fbuf:
firstline, data = _parse_tmy3(fbuf)
with open(str(filename), 'r', encoding=encoding) as fbuf:
# header information on the 1st line (0 indexing)
firstline = fbuf.readline()
# use pandas to read the csv file buffer
# header is actually the second line, but tell pandas to look for
data = pd.read_csv(fbuf, header=0)

meta = dict(zip(head, firstline.rstrip('\n').split(",")))
# convert metadata strings to numeric types
Expand All @@ -178,8 +183,10 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):

# get the date column as a pd.Series of numpy datetime64
data_ymd = pd.to_datetime(data['Date (MM/DD/YYYY)'], format='%m/%d/%Y')
# extract minutes
minutes = data['Time (HH:MM)'].str.split(':').str[1].astype(int)
# shift the time column so that midnite is 00:00 instead of 24:00
shifted_hour = data['Time (HH:MM)'].str[:2].astype(int) % 24
shifted_hour = data['Time (HH:MM)'].str.split(':').str[0].astype(int) % 24
# shift the dates at midnight (24:00) so they correspond to the next day.
# If midnight is specified as 00:00 do not shift date.
data_ymd[data['Time (HH:MM)'].str[:2] == '24'] += datetime.timedelta(days=1) # noqa: E501
Expand All @@ -197,7 +204,8 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
data_ymd.iloc[-1] = data_ymd.iloc[-1].replace(year=coerce_year+1)
# NOTE: as of pvlib-0.6.3, min req is pandas-0.18.1, so pd.to_timedelta
# unit must be in (D,h,m,s,ms,us,ns), but pandas>=0.24 allows unit='hour'
data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h')
data.index = data_ymd + pd.to_timedelta(shifted_hour, unit='h') \
+ pd.to_timedelta(minutes, unit='min')

if recolumn:
data = _recolumn(data) # rename to standard column names
Expand All @@ -207,15 +215,6 @@ def read_tmy3(filename, coerce_year=None, recolumn=True):
return data, meta


def _parse_tmy3(fbuf):
# header information on the 1st line (0 indexing)
firstline = fbuf.readline()
# use pandas to read the csv file buffer
# header is actually the second line, but tell pandas to look for
data = pd.read_csv(fbuf, header=0)
return firstline, data


def _recolumn(tmy3_dataframe):
"""
Rename the columns of the TMY3 DataFrame.
Expand Down Expand Up @@ -385,6 +384,7 @@ def read_tmy2(filename):
----------
.. [1] Marion, W and Urban, K. "Wilcox, S and Marion, W. "User's Manual
for TMY2s". NREL 1995.
:doi: `10.2172/87130`
""" # noqa: E501
# paste in the column info as one long line
string = '%2d%2d%2d%2d%4d%4d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%4d%1s%1d%2d%1s%1d%2d%1s%1d%4d%1s%1d%4d%1s%1d%3d%1s%1d%4d%1s%1d%3d%1s%1d%3d%1s%1d%4d%1s%1d%5d%1s%1d%10d%3d%1s%1d%3d%1s%1d%3d%1s%1d%2d%1s%1d' # noqa: E501
Expand Down
2 changes: 1 addition & 1 deletion pvlib/tests/iotools/test_tmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_solaranywhere_tmy3(solaranywhere_index):
# The SolarAnywhere TMY3 format specifies midnight as 00:00 whereas the
# NREL TMY3 format utilizes 24:00. The SolarAnywhere file is therefore
# included to test files with 00:00 timestamps are parsed correctly
data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE)
data, meta = tmy.read_tmy3(TMY3_SOLARANYWHERE, encoding='iso-8859-1')
pd.testing.assert_index_equal(data.index, solaranywhere_index)
assert meta['USAF'] == 0
assert meta['Name'] == 'Burlington United States'
Expand Down
0