8000 add raw data api index formatting and helper function for querying mi… · lboeman/pvlib-python@9c1fee2 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9c1fee2

Browse files
committed
add raw data api index formatting and helper function for querying midc servers for data
1 parent c3517fb commit 9c1fee2

File tree

5 files changed

+110
-8
lines changed

5 files changed

+110
-8
lines changed

docs/sphinx/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ relevant to solar energy modeling.
323323
iotools.read_srml_month_from_solardat
324324
iotools.read_surfrad
325325
iotools.read_midc
326+
iotools.read_midc_raw_data_from_nrel
326327

327328
A :py:class:`~pvlib.location.Location` object may be created from metadata
328329
in some files.

docs/sphinx/source/whatsnew/v0.6.1.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ Enhancements
3636
:py:func:`pvlib.iotools.read_srml_month_from_solardat` to read University of
3737
Oregon Solar Radiation Monitoring Laboratory data. (:issue:`589`)
3838
* Created :py:func:`pvlib.iotools.read_surfrad` to read NOAA SURFRAD data. (:issue:`590`)
39-
* Created :py:func:`pvlib.iotools.read_midc` to read NREL MIDC data. (:issue:`601`)
39+
* Created :py:func:`pvlib.iotools.read_midc` and :py:func:`pvlib.iotools.read_midc_raw_data_from_nrel`
40+
to read NREL MIDC data. (:issue:`601`)
4041

4142
Bug fixes
4243
~~~~~~~~~

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401
55
from pvlib.iotools.surfrad import read_surfrad # noqa: F401
66
from pvlib.iotools.midc import read_midc # noqa: F401
7+
from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401

pvlib/iotools/midc.py

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ def map_midc_to_pvlib(variable_map, field_name):
2626
Parameters
2727
----------
2828
variable_map: Dictionary
29-
A dictionary for mapping MIDC field nameto pvlib name. See VARIABLE_MAP
30-
for default value and description of how to construct this argument.
29+
A dictionary for mapping MIDC field name to pvlib name. See
30+
VARIABLE_MAP for default value and description of how to construct
31+
this argument.
3132
field_name: string
3233
The Column to map.
3334
@@ -36,13 +37,21 @@ def map_midc_to_pvlib(variable_map, field_name):
3637
label: string
3738
The pvlib variable name associated with the MIDC field or the input if
3839
a mapping does not exist.
40+
41+
Notes
42+
-----
43+
Will fail if field_name to be mapped matches an entry in VARIABLE_MAP and
8000
44+
does not contain brackets. This should not be an issue unless MIDC file
45+
headers are updated.
46+
3947
"""
4048
new_field_name = field_name
4149
for midc_name, pvlib_name in variable_map.items():
4250
if field_name.startswith(midc_name):
4351
# extract the instument and units field and then remove units
4452
instrument_units = field_name[len(midc_name):]
45-
instrument = instrument_units[:instrument_units.find('[') - 1]
53+
units_index = instrument_units.find('[')
54+
instrument = instrument_units[:units_index - 1]
4655
new_field_name = pvlib_name + instrument.replace(' ', '_')
4756
break
4857
return new_field_name
@@ -51,6 +60,17 @@ def map_midc_to_pvlib(variable_map, field_name):
5160
def format_index(data):
5261
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
5362
as the label of the second (time) column.
63+
64+
Parameters
65+
----------
66+
data: Dataframe
67+
Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
68+
with the timezone and contain times in 'HH:MM' format.
69+
70+
Returns
71+
-------
72+
data: Dataframe
73+
Dataframe with DatetimeIndex localized to the provided timezone.
5474
"""
5575
timezone = data.columns[1]
5676
datetime = data['DATE (MM/DD/YYYY)'] + data[timezone]
@@ -60,7 +80,32 @@ def format_index(data):
6080
return data
6181

6282

63-
def read_midc(filename, variable_map=VARIABLE_MAP):
83+
def format_index_raw(data):
84+
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
85+
as the label of the third column.
86+
87+
Parameters
88+
----------
89+
data: Dataframe
90+
Must contain columns 'Year' and 'DOY'. Timezone must be found as the
91+
label of the third (time) column.
92+
93+
Returns
94+
-------
95+
data: Dataframe
96+
The data with a Datetime index localized to the provided timezone.
97+
"""
98+
tz = data.columns[3]
99+
year = data.Year.apply(str)
100+
jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
101+
time = data[tz].apply(lambda x: '{:04d}'.format(x))
102+
index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
103+
data = data.set_index(index)
104+
data = data.tz_localize(tz)
105+
return data
106+
107+
108+
def read_midc(filename, variable_map=VARIABLE_MAP, raw_data=False):
64109
"""Read in National Renewable Energy Laboratory Measurement and
65110
Instrumentation Data Center [1]_ weather data.
66111
@@ -72,6 +117,9 @@ def read_midc(filename, variable_map=VARIABLE_MAP):
72117
Dictionary for mapping MIDC field names to pvlib names. See variable
73118
`VARIABLE_MAP` for default and Notes section below for a description of
74119
its format.
120+
raw_data: boolean
121+
Set to true to use format_index_raw to correctly format the date/time
122+
columns of MIDC raw data files.
75123
76124
Returns
77125
-------
@@ -99,7 +147,41 @@ def read_midc(filename, variable_map=VARIABLE_MAP):
99147
`https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
100148
"""
101149
data = pd.read_csv(filename)
102-
data = format_index(data)
150+
if raw_data:
151+
data = format_index_raw(data)
152+
else:
153+
data = format_index(data)
103154
mapper = partial(map_midc_to_pvlib, variable_map)
104155
data = data.rename(columns=mapper)
105156
return data
157+
158+
159+
def read_midc_raw_data_from_nrel(site, start, end):
160+
"""Request and read MIDC data directly from the raw data api.
161+
162+
Parameters
163+
----------
164+
site: string
165+
The MIDC station id.
166+
start: datetime
167+
Start date for requested data.
168+
end: datetime
169+
End date for requested data.
170+
171+
Returns
172+
-------
173+
data:
174+
Dataframe with DatetimeIndex localized to the station location.
175+
176+
Notes
177+
-----
178+
Requests spanning an instrumentation change will yield an error. See the
179+
MIDC raw data api page here_ for more details and considerations.
180+
.. _here: https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist
181+
"""
182+
args = {'site': site,
183+
'begin': start.strftime('%Y%m%d'),
184+
'end': end.strftime('%Y%m%d')}
185+
endpoint = 'https://midcdmz.nrel.gov/apps/data_api.pl?'
186+
url = endpoint + '&'.join(['{}={}'.format(k, v) for k, v in args.items()])
187+
return read_midc(url, raw_data=True)

pvlib/test/test_midc.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33

44
import pandas as pd
5+
from pandas.util.testing import network
56
import pytest
67

78
from pvlib.iotools import midc
@@ -10,7 +11,7 @@
1011
test_dir = os.path.dirname(
1112
os.path.abspath(inspect.getfile(inspect.currentframe())))
1213
midc_testfile = os.path.join(test_dir, '../data/midc_20181014.txt')
13-
14+
midc_network_testfile = 'https://midcdmz.nrel.gov/apps/data_api.pl?site=UAT&begin=20181018&end=20181019'
1415

1516
@pytest.mark.parametrize('field_name,expected', [
1617
('Temperature @ 2m [deg C]', 'temp_air_@_2m'),
@@ -22,7 +23,7 @@ def test_read_midc_mapper_function(field_name, expected):
2223
assert midc.map_midc_to_pvlib(midc.VARIABLE_MAP, field_name) == expected
2324

2425

25-
def test_read_midc_format_index():
26+
def test_midc_format_index():
2627
data = pd.read_csv(midc_testfile)
2728
data = midc.format_index(data)
2829
start = pd.Timestamp("20181014 00:00")
@@ -33,9 +34,25 @@ def test_read_midc_format_index():
3334
assert data.index[0] == start
3435
assert data.index[-1] == end
3536

37+
def test_midc_format_index_raw():
38+
data = pd.read_csv(midc_network_testfile)
39+
data = midc.format_index_raw(data)
40+
start = pd.Timestamp('20181018 00:00')
41+
start = start.tz_localize('MST')
42+
end = pd.Timestamp('20181019 23:59')
43+
end = end.tz_localize('MST')
44+
assert data.index[0] == start
45+
assert data.index[-1] == end
3646

3747
def test_read_midc_var_mapping_as_arg():
3848
data = midc.read_midc(midc_testfile, variable_map=midc.VARIABLE_MAP)
3949
assert 'ghi_PSP' in data.columns
4050
assert 'temp_air_@_2m' in data.columns
4151
assert 'temp_air_@_50m' in data.columns
52+
53+
54+
start_ts = pd.Timestamp('20181018')
55+
end_ts = pd.Timestamp('20181019')
56+
@network
57+
def test_read_midc_raw_data_from_nrel():
58+
midc.read_midc_raw_data_from_nrel('UAT', start_ts, end_ts)

0 commit comments

Comments
 (0)
0