10000 fix: correct serialization empty columns into LineProtocol [DataFrame… · calvinlua/influxdb-client-python@500835f · GitHub
[go: up one dir, main page]

Skip to content

Commit 500835f

Browse files
authored
fix: correct serialization empty columns into LineProtocol [DataFrame] (influxdata#359)
1 parent 81e7d21 commit 500835f

File tree

3 files changed

+33
-3
lines changed

3 files changed

+33
-3
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
- `OrganizationsApi` - add possibility to: `update`
77
- `UsersApi` - add possibility to: `update`, `delete`, `find`
88

9+
### Bug Fixes
10+
1. [#359](https://github.com/influxdata/influxdb-client-python/pull/359): Correct serialization empty columns into LineProtocol [DataFrame]
11+
912
## 1.23.0 [2021-10-26]
1013

1114
### Deprecates

influxdb_client/client/write/dataframe_serializer.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def _any_not_nan(p, indexes):
2727
return any(map(lambda x: _not_nan(p[x]), indexes))
2828

2929

30+
_EMPTY_EXPRESSION = "_EMPTY_LINE_PROTOCOL_PART_"
31+
32+
3033
class DataframeSerializer:
3134
"""Serialize DataFrame into LineProtocols."""
3235

@@ -177,13 +180,15 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION
177180
field_value = f'{sep}{key_format}={{{val_format}}}'
178181
elif issubclass(value.type, np.floating):
179182
if null_columns[index]:
180-
field_value = f"""{{"" if math.isnan({val_format}) else f"{sep}{key_format}={{{val_format}}}"}}"""
183+
field_value = f"""{{
184+
"{sep}{_EMPTY_EXPRESSION}" if math.isnan({val_format}) else f"{sep}{key_format}={{{val_format}}}"
185+
}}"""
181186
else:
182187
field_value = f'{sep}{key_format}={{{val_format}}}'
183188
else:
184189
if null_columns[index]:
185190
field_value = f"""{{
186-
'' if type({val_format}) == float and math.isnan({val_format}) else
191+
'{sep}{_EMPTY_EXPRESSION}' if type({val_format}) == float and math.isnan({val_format}) else
187192
f'{sep}{key_format}="{{str({val_format}).translate(_ESCAPE_STRING)}}"'
188193
}}"""
189194
else:
@@ -244,7 +249,7 @@ def serialize(self, chunk_idx: int = None):
244249
if self.first_field_maybe_null:
245250
# When the first field is null (None/NaN), we'll have
246251
# a spurious leading comma which needs to be removed.
247-
lp = (re.sub('^((\\ |[^ ])* ),', '\\1', self.f(p))
252+
lp = (re.sub(f",{_EMPTY_EXPRESSION}|{_EMPTY_EXPRESSION},|{_EMPTY_EXPRESSION}", '', self.f(p))
248253
for p in filter(lambda x: _any_not_nan(x, self.field_indexes), _itertuples(chunk)))
249254
return list(lp)
250255
else:

tests/test_WriteApiDataFrame.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import time
22
import unittest
33
from datetime import timedelta
4+
from io import StringIO
45

56
from influxdb_client import InfluxDBClient, WriteOptions, WriteApi, WritePrecision
67
from influxdb_client.client.write.dataframe_serializer import data_frame_to_list_of_points, DataframeSerializer
@@ -374,6 +375,27 @@ def test_index_not_periodIndex_respect_write_precision(self):
374375
self.assertEqual(1, len(points))
375376
self.assertEqual(f"h2o level=15i {precision[1]}", points[0])
376377

378+
def test_serialize_strings_with_commas(self):
379+
from influxdb_client.extras import pd
380+
381+
csv = StringIO("""sep=;
382+
Date;Entry Type;Value;Currencs;Category;Person;Account;Counter Account;Group;Note;Recurring;
383+
"01.10.2018";"Expense";"-1,00";"EUR";"Testcategory";"";"Testaccount";"";"";"This, works";"no";
384+
"02.10.2018";"Expense";"-1,00";"EUR";"Testcategory";"";"Testaccount";"";"";"This , works not";"no";
385+
""")
386+
data_frame = pd.read_csv(csv, sep=";", skiprows=1, decimal=",", encoding="utf-8")
387+
data_frame['Date'] = pd.to_datetime(data_frame['Date'], format="%d.%m.%Y")
388+
data_frame.set_index('Date', inplace=True)
389+
390+
points = data_frame_to_list_of_points(data_frame=data_frame,
391+
data_frame_measurement_name="bookings",
392+
data_frame_tag_columns=['Entry Type', 'Category', 'Person', 'Account'],
393+
point_settings=PointSettings())
394+
395+
self.assertEqual(2, len(points))
396+
self.assertEqual("bookings,Account=Testaccount,Category=Testcategory,Entry\\ Type=Expense Currencs=\"EUR\",Note=\"This, works\",Recurring=\"no\",Value=-1.0 1538352000000000000", points[0])
397+
self.assertEqual("bookings,Account=Testaccount,Category=Testcategory,Entry\\ Type=Expense Currencs=\"EUR\",Note=\"This , works not\",Recurring=\"no\",Value=-1.0 1538438400000000000", points[1])
398+
377399

378400
class DataSerializerChunksTest(unittest.TestCase):
379401
def test_chunks(self):

0 commit comments

Comments
 (0)
0