8000 Introduce Datetime type for ranges outside datetime.[MIN|MAX]YEAR · scylladb/python-driver@06b1389 · GitHub
[go: up one dir, main page]

Skip to content

Commit 06b1389

Browse files
Introduce Datetime type for ranges outside datetime.[MIN|MAX]YEAR
In Python `datetime.datetime` type year has to be in range [MINYEAR, MAXYEAR]. This range is not the same as possible timestamps in scylla. Previously if timestamp was outside this range it made driver raise an Exception. It was not correct behavior. There was a work around implemented in cqlsh. This commit introduces a `Datetime` type to accommodate ranges outside datetime.[MIN|MAX]YEAR. For Datetimes that cannot be represented as a datetime.datetime (because datetime.MINYEAR, datetime.MAXYEAR), this type falls back to printing milliseconds_from_epoch offset. Fixes: #255
1 parent f356716 commit 06b1389

File tree

14 files changed

+223
-63
lines changed

14 files changed

+223
-63
lines changed

cassandra/cqlengine/columns.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from cassandra.cqltypes import SimpleDateType, _cqltypes, UserType
2323
from cassandra.cqlengine import ValidationError
2424
from cassandra.cqlengine.functions import get_total_seconds
25-
from cassandra.util import Duration as _Duration
25+
from cassandra.util import Datetime, Duration as _Duration
2626

2727
log = logging.getLogger(__name__)
2828

@@ -542,17 +542,19 @@ class DateTime(Column):
542542
def to_python(self, value):
543543
if value is None:
544544
return
545+
elif isinstance(value, Datetime):
546+
return value
545547
if isinstance(value, datetime):
546548
if DateTime.truncate_microseconds:
547549
us = value.microsecond
548550
truncated_us = us // 1000 * 1000
549-
return value - timedelta(microseconds=us - truncated_us)
551+
return Datetime(value - timedelta(microseconds=us - truncated_us))
550552
else:
551-
return value
553+
return Datetime(value)
552554
elif isinstance(value, date):
553-
return datetime(*(value.timetuple()[:6]))
555+
return Datetime(datetime(*(value.timetuple()[:6])))
554556

555-
return datetime.utcfromtimestamp(value)
557+
return Datetime(datetime.utcfromtimestamp(value))
556558

557559
def to_database(self, value):
558560
value = super(DateTime, self).to_database(value)
@@ -561,6 +563,11 @@ def to_database(self, value):
561563
if not isinstance(value, datetime):
562564
if isinstance(value, date):
563565
value = datetime(value.year, value.month, value.day)
566+
elif isinstance(value, Datetime):
567+
try:
568+
value = value.datetime()
569+
except ValueError:
570+
return int(value.milliseconds_from_epoch)
564571
else:
565572
raise ValidationError("{0} '{1}' is not a datetime object".format(self.column_name, value))
566573
epoch = datetime(1970, 1, 1, tzinfo=value.tzinfo)

cassandra/cqltypes.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from binascii import unhexlify
3333
import calendar
3434
from collections import namedtuple
35+
import datetime
3536
from decimal import Decimal
3637
import io
3738
from itertools import chain
@@ -642,17 +643,16 @@ def interpret_datestring(val):
642643
@staticmethod
643644
def deserialize(byts, protocol_version):
644645
timestamp = int64_unpack(byts) / 1000.0
645-
return util.datetime_from_timestamp(timestamp)
646+
return util.Datetime(util.Datetime(util.DATETIME_EPOC) + datetime.timedelta(seconds=timestamp))
646647

647648
@staticmethod
648649
def serialize(v, protocol_version):
649650
try:
650-
# v is datetime
651-
timestamp_seconds = calendar.timegm(v.utctimetuple())
652-
timestamp = timestamp_seconds * 1e3 + getattr(v, 'microsecond', 0) / 1e3
651+
# v is Datetime
652+
timestamp = v.milliseconds_from_epoch
653653
except AttributeError:
654654
try:
655-
timestamp = calendar.timegm(v.timetuple()) * 1e3
655+
timestamp = util.Datetime(v).milliseconds_from_epoch
656656
except AttributeError:
657657
# Ints and floats are valid timestamps too
658658
if type(v) not in _number_types:

cassandra/deserializers.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ from libc.stdint cimport int32_t, uint16_t
1717

1818
include 'cython_marshal.pyx'
1919
from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
20-
from cassandra.cython_utils cimport datetime_from_timestamp
20+
from cassandra.cython_utils cimport datetime_from_timestamp, DATETIME_EPOC
2121

2222
from cython.view cimport array as cython_array
2323
from cassandra.tuple cimport tuple_new, tuple_set
@@ -140,7 +140,7 @@ cdef class DesCounterColumnType(DesLongType):
140140
cdef class DesDateType(Deserializer):
141141
cdef deserialize(self, Buffer *buf, int protocol_version):
142142
cdef double timestamp = unpack_num[int64_t](buf) / 1000.0
143-
return datetime_from_timestamp(timestamp)
143+
return util.Datetime(util.Datetime(DATETIME_EPOC) + datetime.timedelta(seconds=timestamp))
144144

145145

146146
cdef class TimestampType(DesDateType):

cassandra/encoder.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from uuid import UUID
3030
import six
3131

32-
from cassandra.util import (OrderedDict, OrderedMap, OrderedMapSerializedKey,
32+
from cassandra.util import (Datetime, OrderedDict, OrderedMap, OrderedMapSerializedKey,
3333
sortedset, Time, Date, Point, LineString, Polygon)
3434

3535
if six.PY3:
@@ -80,6 +80,7 @@ def __init__(self):
8080
datetime.date: self.cql_encode_date,
8181
datetime.time: self.cql_encode_time,
8282
Date: self.cql_encode_date_ext,
83+
Datetime: self.cql_encode_datetime_ext,
8384
Time: self.cql_encode_time,
8485
dict: self.cql_encode_map_collection,
8586
OrderedDict: self.cql_encode_map_collection,
@@ -171,6 +172,13 @@ def cql_encode_datetime(self, val):
171172
timestamp = calendar.timegm(val.utctimetuple())
172173
return str(long(timestamp * 1e3 + getattr(val, 'microsecond', 0) / 1e3))
173174

175+
def cql_encode_datetime_ext(self, val):
176+
"""
177+
Encodes a :class:`cassandra.util.Datetime` object as an integer
178+
"""
179+
# using the int form in case the Datetime exceeds datetime.[MIN|MAX]YEAR
180+
return str(val.milliseconds_from_epoch)
181+
174182
def cql_encode_date(self, val):
175183
"""
176184
Converts a :class:`datetime.date` object to a string with format

cassandra/util.py

Lines changed: 125 additions & 0 deletions
< 2851 td data-grid-cell-id="diff-ea5e4e5a9bfe602bd634d796481f3615539ffebd4502bdf3513b39a033857652-1152-1171-2" data-line-anchor="diff-ea5e4e5a9bfe602bd634d796481f3615539ffebd4502bdf3513b39a033857652R1171" data-selected="false" role="gridcell" style="background-color:var(--diffBlob-additionLine-bgColor, var(--diffBlob-addition-bgColor-line));padding-right:24px" tabindex="-1" valign="top" class="focusable-grid-cell diff-text-cell right-side-diff-cell pt-4 left-side">+
10000
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,131 @@ def __str__(self):
11501150
except:
11511151
# If we overflow datetime.[MIN|MAX]
11521152
return str(self.days_from_epoch)
1153+
1154+
1155+
@total_ordering
1156+
class Datetime(object):
1157+
'''
1158+
Idealized datetime: year, month, day, hour, minute, second, microsecond, and tzinfo
1159+
1160+
Offers wider year range than datetime.datetime. For Datetimess that cannot be represented
1161+
as a datetime.datetime (because datetime.MINYEAR, datetime.MAXYEAR), this type falls back
1162+
to printing milliseconds_from_epoch offset.
1163+
'''
1164+
1165+
MICRO = 1000
1166+
MILLI = 1000 * MICRO
1167+
SECOND = 1000 * MILLI
1168+
MINUTE = 60
1169+
HOUR = 60 * MINUTE
1170+
DAY = 24 * HOUR
1171
1172+
milliseconds_from_epoch = 0
1173+
tzinfo = None
1174+
1175+
def __init__(self, value):
1176+
"""
1177+
Initializer value can be:
1178+
1179+
- number_type: milliseconds from epoch (1970, 1, 1). Can be negative.
1180+
- datetime.datetime: built-in datetime
1181+
"""
1182+
if isinstance(value, (int, long, float)):
1183+
self.milliseconds_from_epoch = value
1184+
elif isinstance(value, datetime.datetime):
1185+
self._from_datetime(value)
1186+
elif isinstance(value, datetime.date):
1187+
self._from_timetuple(value.timetuple())
1188+
elif isinstance(value, Datetime):
1189+
self.milliseconds_from_epoch = value.milliseconds_from_epoch
1190+
self.tzinfo = value.tzinfo
1191+
else:
1192+
raise TypeError('Date arguments must be a whole number or datetime.datetime')
1193+
1194+
@property
1195+
def seconds(self):
1196+
"""
1197+
Absolute seconds from epoch (can be negative)
1198+
"""
1199+
return self.milliseconds_from_epoch // 1000
1200+
1201+
@property
1202+
def days(self):
1203+
"""
1204+
Absolute days from epoch (can be negative)
1205+
"""
1206+
return self.seconds // Date.DAY
1207+
1208+
def datetime(self):
1209+
"""
1210+
Return a built-in datetime.datetime for Dates falling in the years [datetime.MINYEAR, datetime.MAXYEAR]
1211+
1212+
ValueError is raised for Dates outside this range.
1213+
"""
1214+
try:
1215+
dt = datetime.datetime(1970, 1, 1, tzinfo=self.tzinfo) + datetime.timedelta(milliseconds=self.milliseconds_from_epoch)
1216+
return dt
1217+
except Exception:
1218+
raise ValueError("%r exceeds ranges for built-in datetime.datetime" % self)
1219+
1220+
def utctimetuple(self):
1221+
return self.datetime().utctimetuple()
1222+
1223+
def timetuple(self):
1224+
return self.datetime().timetuple()
1225+
1226+
def isoformat(self, sep='T', timespec='auto'):
1227+
return self.datetime().isoformat(sep, timespec)
1228+
1229+
def _from_timetuple(self, t):
1230+
self.milliseconds_from_epoch = calendar.timegm(t) * 1000
1231+
1232+
def _from_datetime(self, v):
1233+
self.milliseconds_from_epoch = calendar.timegm(v.timetuple()) * 1000 + v.microsecond // 1000
1234+
self.tzinfo = v.tzinfo
1235+
1236+
def __hash__(self):
1237+
return self.milliseconds_from_epoch
1238+
1239+
def __eq__(self, other):
1240+
if isinstance(other, Datetime):
1241+
return self.milliseconds_from_epoch == other.milliseconds_from_epoch
1242+
1243+
if isinstance(other, (int, long, float)):
1244+
return self.milliseconds_from_epoch == other
1245+
1246+
try:
1247+
return self.datetime() == other
1248+
except Exception:
1249+
return False
1250+
1251+
def __ne__(self, other):
1252+
return not self.__eq__(other)
1253+
1254+
def __lt__(self, other):
1255+
if not isinstance(other, Datetime):
1256+
return NotImplemented
1257+
return self.milliseconds_from_epoch < other.milliseconds_from_epoch
1258+
1259+
def __add__(self, other):
1260+
if isinstance(other, datetime.timedelta):
1261+
return Datetime(int(self.milliseconds_from_epoch + other.total_seconds() * 1000))
1262+
return self + other
1263+
1264+
def __sub__(self, other):
1265+
if isinstance(other, Datetime):
1266+
return datetime.timedelta(milliseconds=self.milliseconds_from_epoch - other.milliseconds_from_epoch)
1267+
return self - other
1268+
1269+
def __repr__(self):
1270+
return "Datetime(%s)" % self.milliseconds_from_epoch
1271+
1272+
def __str__(self):
1273+
try:
1274+
dt = datetime.datetime(1970, 1, 1, tzinfo=self.tzinfo) + datetime.timedelta(milliseconds=self.milliseconds_from_epoch)
1275+
return "%04d-%02d-%02d %02d:%02d:%02d.%09d" % (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond)
1276+
except:
1277+
return str(self.milliseconds_from_epoch)
11531278

11541279
import socket
11551280
if hasattr(socket, 'inet_pton'):

docs/dates-and-times.rst

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,46 +11,38 @@ Timestamps in Cassandra are timezone-naive timestamps encoded as millseconds sin
1111
timestamps in this database usually find it easiest to reason about them if they are always assumed to be UTC. To quote the
1212
pytz documentation, "The preferred way of dealing with times is to always work in UTC, converting to localtime only when
1313
generating output to be read by humans." The driver adheres to this tenant, and assumes UTC is always in the database. The
14-
driver attempts to make this correct on the way in, and assumes no timezone on the way out.
14+
driver attempts to make this correct on the way in, and assumes no timezone on the way out. Timestamps in Cassandra are
15+
idealized markers, much like ``datetime.datetime`` in the Python standard library. Unlike this Python implementation, the
16+
Cassandra encoding supports much wider ranges. To accommodate these ranges without overflow, this driver returns these data
17+
in custom type: :class:`.util.Datetime`.
1518

1619
Write Path
1720
~~~~~~~~~~
1821
When inserting timestamps, the driver handles serialization for the write path as follows:
1922

20-
If the input is a ``datetime.datetime``, the serialization is normalized by starting with the ``utctimetuple()`` of the
21-
value.
22-
23-
- If the ``datetime`` object is timezone-aware, the timestamp is shifted, and represents the UTC timestamp equivalent.
24-
- If the ``datetime`` object is timezone-naive, this results in no shift -- any ``datetime`` with no timezone information is assumed to be UTC
25-
26-
Note the second point above applies even to "local" times created using ``now()``::
27-
28-
>>> d = datetime.now()
29-
30-
>>> print(d.tzinfo)
31-
None
32-
33-
34-
These do not contain timezone information intrinsically, so they will be assumed to be UTC and not shifted. When generating
35-
timestamps in the application, it is clearer to use ``datetime.utcnow()`` to be explicit about it.
23+
The driver accepts anything that can be used to construct the :class:`.util.Datetime` class.
24+
See the linked API docs for details. It uses :attr:`.util.Datetime.milliseconds_from_epoch` as epoch-relative millisecond timestamp.
3625

3726
If the input for a timestamp is numeric, it is assumed to be a epoch-relative millisecond timestamp, as specified in the
3827
CQL spec -- no scaling or conversion is done.
3928

4029
Read Path
4130
~~~~~~~~~
31+
The driver always returns custom type for ``timestamp``.
32+
4233
The driver always assumes persisted timestamps are UTC and makes no attempt to localize them. Returned values are
43-
timezone-naive ``datetime.datetime``. We follow this approach because the datetime API has deficiencies around daylight
34+
timezone-naive :class:`.util.Datetime`. We follow this approach because the datetime API has deficiencies around daylight
4435
saving time, and the defacto package for handling this is a third-party package (we try to minimize external dependencies
4536
and not make decisions for the integrator).
4637

4738
The decision for how to handle timezones is left to the application. For the most part it is straightforward to apply
48-
localization to the ``datetime``\s returned by queries. One prevalent method is to use pytz for localization::
39+
localization to the :class:`.util.Datetime` returned by queries converted to ``datetime.datetime`` by
40+
`.util.Datetime.datetime`. One prevalent method is to use pytz for localization::
4941

5042
import pytz
5143
user_tz = pytz.timezone('US/Central')
5244
timestamp_naive = row.ts
53-
timestamp_utc = pytz.utc.localize(timestamp_naive)
45+
timestamp_utc = pytz.utc.localize(timestamp_naive.datetime())
5446
timestamp_presented = timestamp_utc.astimezone(user_tz)
5547

5648
This is the most robust approach (likely refactored into a function). If it is deemed too cumbersome to apply for all call

tests/integration/cqlengine/columns/test_container_columns.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from cassandra.cqlengine.functions import get_total_seconds
2727
from cassandra.cqlengine.models import Model, ValidationError
2828
from cassandra.cqlengine.management import sync_table, drop_table
29+
from cassandra.util import Datetime
2930

3031
from tests.integration import CASSANDRA_IP
3132
from tests.integration.cqlengine import is_prepend_reversed
@@ -390,7 +391,7 @@ def test_io_success(self):
390391
""" Tests that a basic usage works as expected """
391392
k1 = uuid4()
392393
k2 = uuid4()
393-
now = datetime.now()
394+
now = Datetime(datetime.now())
394395
then = now + timedelta(days=1)
395396
m1 = TestMapModel.create(int_map={1: k1, 2: k2},
396397
text_map={'now': now, 'then': then})

tests/integration/cqlengine/columns/test_validation.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def test_datetime_timestamp(self):
9797
dt_value = 1454520554
9898
self.DatetimeTest.objects.create(test_id=5, created_at=dt_value)
9999
dt2 = self.DatetimeTest.objects(test_id=5).first()
100-
self.assertEqual(dt2.created_at, datetime.utcfromtimestamp(dt_value))
100+
self.assertEqual(dt2.created_at, util.Datetime(datetime.utcfromtimestamp(dt_value)))
101101

102102
def test_datetime_large(self):
103103
dt_value = datetime(2038, 12, 31, 10, 10, 10, 123000)
@@ -318,12 +318,12 @@ class TestDateTime(DataType, BaseCassEngTestCase):
318318
def setUpClass(cls):
319319
cls.db_klass, cls.python_klass = (
320320
DateTime,
321-
datetime
321+
util.Datetime
322322
)
323323
cls.first_value, cls.second_value, cls.third_value = (
324-
datetime(2017, 4, 13, 18, 34, 24, 317000),
325-
datetime(1, 1, 1),
326-
datetime(1, 1, 2)
324+
util.Datetime(datetime(2017, 4, 13, 18, 34, 24, 317000)),
325+
util.Datetime(datetime(1, 1, 1)),
326+
util.Datetime(datetime(1, 1, 2))
327327
)
328328
super(TestDateTime, cls).setUpClass()
329329

tests/integration/cqlengine/query/test_datetime_queries.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from datetime import datetime, timedelta
1616
from uuid import uuid4
1717
from cassandra.cqlengine.functions import get_total_seconds
18+
from cassandra.util import Datetime
1819

1920
from tests.integration.cqlengine.base import BaseCassEngTestCase
2021

@@ -65,7 +66,7 @@ def test_range_query(self):
6566
@execute_count(3)
6667
def test_datetime_precision(self):
6768
""" Tests that millisecond resolution is preserved when saving datetime objects """
68-
now = datetime.now()
69+
now = Datetime(datetime.now())
6970
pk = 1000
7071
obj = DateTimeQueryTestModel.create(user=pk, day=now, data='energy cheese')
7172
load = DateTimeQueryTestModel.get(user=pk)

tests/integration/datatype_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from uuid import uuid1, uuid4
1818
import six
1919

20-
from cassandra.util import OrderedMap, Date, Time, sortedset, Duration
20+
from cassandra.util import Datetime, OrderedMap, Date, Time, sortedset, Duration
2121

2222
from tests.integration import get_server_versions
2323

@@ -104,7 +104,7 @@ def get_sample_data():
104104
sample_data[datatype] = 'text'
105105

106106
elif datatype == 'timestamp':
107-
sample_data[datatype] = datetime(2013, 12, 31, 23, 59, 59, 999000)
107+
sample_data[datatype] = Datetime(datetime(2013, 12, 31, 23, 59, 59, 999000))
108108

109109
elif datatype == 'timeuuid':
110110
sample_data[datatype] = uuid1()

0 commit comments

Comments
 (0)
0