14
14
import time
15
15
import locale
16
16
import calendar
17
+ import re
17
18
from re import compile as re_compile
18
19
from re import sub as re_sub
19
20
from re import IGNORECASE
@@ -41,6 +42,21 @@ def _findall(haystack, needle):
41
42
yield i
42
43
i += len (needle )
43
44
45
+
46
+ lzh_TW_alt_digits = (
47
+ # 〇:一:二:三:四:五:六:七:八:九
48
+ '\u3007 ' , '\u4e00 ' , '\u4e8c ' , '\u4e09 ' , '\u56db ' ,
49
+ '\u4e94 ' , '\u516d ' , '\u4e03 ' , '\u516b ' , '\u4e5d ' ,
50
+ # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九
51
+ '\u5341 ' , '\u5341 \u4e00 ' , '\u5341 \u4e8c ' , '\u5341 \u4e09 ' , '\u5341 \u56db ' ,
52
+ '\u5341 \u4e94 ' , '\u5341 \u516d ' , '\u5341 \u4e03 ' , '\u5341 \u516b ' , '\u5341 \u4e5d ' ,
53
+ # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九
54
+ '\u5eff ' , '\u5eff \u4e00 ' , '\u5eff \u4e8c ' , '\u5eff \u4e09 ' , '\u5eff \u56db ' ,
55
+ '\u5eff \u4e94 ' , '\u5eff \u516d ' , '\u5eff \u4e03 ' , '\u5eff \u516b ' , '\u5eff \u4e5d ' ,
56
+ # 卅:卅一
57
+ '\u5345 ' , '\u5345 \u4e00 ' )
58
+
59
+
44
60
class LocaleTime (object ):
45
61
"""Stores and handles locale-specific information related to time.
46
62
@@ -84,6 +100,7 @@ def __init__(self):
84
100
self .__calc_weekday ()
85
101
self .__calc_month ()
86
102
self .__calc_am_pm ()
103
+ self .__calc_alt_digits ()
87
104
self .__calc_timezone ()
88
105
self .__calc_date_time ()
89
106
if _getlang () != self .lang :
@@ -119,36 +136,76 @@ def __calc_am_pm(self):
119
136
am_pm .append (time .strftime ("%p" , time_tuple ).lower ().strip ())
120
137
self .am_pm = am_pm
121
138
139
+ def __calc_alt_digits (self ):
140
+ # Set self.LC_alt_digits by using time.strftime().
141
+
142
+ # The magic data should contain all decimal digits.
143
+ time_tuple = time .struct_time ((1998 , 1 , 27 , 10 , 43 , 56 , 1 , 27 , 0 ))
144
+ s = time .strftime ("%x%X" , time_tuple )
145
+ if s .isascii ():
146
+ # Fast path -- all digits are ASCII.
147
+ self .LC_alt_digits = ()
148
+ return
149
+
150
+ digits = '' .join (sorted (set (re .findall (r'\d' , s ))))
151
+ if len (digits ) == 10 and ord (digits [- 1 ]) == ord (digits [0 ]) + 9 :
152
+ # All 10 decimal digits from the same set.
153
+ if digits .isascii ():
154
+ # All digits are ASCII.
155
+ self .LC_alt_digits = ()
156
+ return
157
+
158
+ self .LC_alt_digits = [a + b for a in digits for b in digits ]
159
+ # Test whether the numbers contain leading zero.
160
+ time_tuple2 = time .struct_time ((2000 , 1 , 1 , 1 , 1 , 1 , 5 , 1 , 0 ))
161
+ if self .LC_alt_digits [1 ] not in time .strftime ("%x %X" , time_tuple2 ):
162
+ self .LC_alt_digits [:10 ] = digits
163
+ return
164
+
165
+ # Either non-Gregorian calendar or non-decimal numbers.
166
+ if {'\u4e00 ' , '\u4e03 ' , '\u4e5d ' , '\u5341 ' , '\u5eff ' }.issubset (s ):
167
+ # lzh_TW
168
+ self .LC_alt_digits = lzh_TW_alt_digits
169
+ return
170
+
171
+ self .LC_alt_digits = None
172
+
122
173
def __calc_date_time (self ):
123
- # Set self.date_time , self.date, & self.time by using
124
- # time.strftime().
174
+ # Set self.LC_date_time , self.LC_date, self.LC_time and
175
+ # self.LC_time_ampm by using time.strftime().
125
176
126
177
# Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
127
178
# overloaded numbers is minimized. The order in which searches for
128
179
# values within the format string is very important; it eliminates
129
180
# possible ambiguity for what something represents.
130
181
time_tuple = time .struct_time ((1999 ,3 ,17 ,22 ,44 ,55 ,2 ,76 ,0 ))
131
182
time_tuple2 = time .struct_time ((1999 ,1 ,3 ,1 ,1 ,1 ,6 ,3 ,0 ))
132
- replacement_pairs = [
183
+ replacement_pairs = []
184
+
185
+ # Non-ASCII digits
186
+ if self .LC_alt_digits or self .LC_alt_digits is None :
187
+ for n , d in [(19 , '%OC' ), (99 , '%Oy' ), (22 , '%OH' ),
188
+ (44 , '%OM' ), (55 , '%OS' ), (17 , '%Od' ),
189
+ (3 , '%Om' ), (2 , '%Ow' ), (10 , '%OI' )]:
190
+ if self .LC_alt_digits is None :
191
+ s = chr (0x660 + n // 10 ) + chr (0x660 + n % 10 )
192
+ replacement_pairs .append ((s , d ))
193
+ if n < 10 :
194
+ replacement_pairs .append ((s [1 ], d ))
195
+ elif len (self .LC_alt_digits ) > n :
196
+ replacement_pairs .append ((self .LC_alt_digits [n ], d ))
197
+ else :
198
+ replacement_pairs .append ((time .strftime (d , time_tuple ), d ))
199
+ replacement_pairs += [
133
200
('1999' , '%Y' ), ('99' , '%y' ), ('22' , '%H' ),
134
201
('44' , '%M' ), ('55' , '%S' ), ('76' , '%j' ),
135
202
('17' , '%d' ), ('03' , '%m' ), ('3' , '%m' ),
136
203
# '3' needed for when no leading zero.
137
204
('2' , '%w' ), ('10' , '%I' ),
138
- # Non-ASCII digits
139
- ('\u0661 \u0669 \u0669 \u0669 ' , '%Y' ),
140
- ('\u0669 \u0669 ' , '%Oy' ),
141
- ('\u0662 \u0662 ' , '%OH' ),
142
- ('\u0664 \u0664 ' , '%OM' ),
143
- ('\u0665 \u0665 ' , '%OS' ),
144
- ('\u0661 \u0667 ' , '%Od' ),
145
- ('\u0660 \u0663 ' , '%Om' ),
146
- ('\u0663 ' , '%Om' ),
147
- ('\u0662 ' , '%Ow' ),
148
- ('\u0661 \u0660 ' , '%OI' ),
149
205
]
206
+
150
207
date_time = []
151
- for directive in ('%c' , '%x' , '%X' ):
208
+ for directive in ('%c' , '%x' , '%X' , '%r' ):
152
209
current_format = time .strftime (directive , time_tuple ).lower ()
153
210
current_format = current_format .replace ('%' , '%%' )
154
211
# The month and the day of the week formats are treated specially
@@ -172,9 +229,10 @@ def __calc_date_time(self):
172
229
if tz :
173
230
current_format = current_format .replace (tz , "%Z" )
174
231
# Transform all non-ASCII digits to digits in range U+0660 to U+0669.
175
- current_format = re_sub (r'\d(?<![0-9])' ,
176
- lambda m : chr (0x0660 + int (m [0 ])),
177
- current_format )
232
+ if not current_format .isascii () and self .LC_alt_digits is None :
233
+ current_format = re_sub (r'\d(?<![0-9])' ,
234
+ lambda m : chr (0x0660 + int (m [0 ])),
235
+ current_format )
178
236
for old , new in replacement_pairs :
179
237
current_format = current_format .replace (old , new )
180
238
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -189,6 +247,7 @@ def __calc_date_time(self):
189
247
self .LC_date_time = date_time [0 ]
190
248
self .LC_date = date_time [1 ]
191
249
self .LC_time = date_time [2 ]
250
+ self .LC_time_ampm = date_time [3 ]
192
251
193
252
def __find_month_format (self , directive ):
194
253
"""Find the month format appropriate for the current locale.
@@ -213,7 +272,7 @@ def __find_month_format(self, directive):
213
272
full_indices &= indices
214
273
indices = set (_findall (datetime , self .a_month [m ]))
215
274
if abbr_indices is None :
216
- abbr_indices = indices
275
+ abbr_indices = set ( indices )
217
276
else :
218
277
abbr_indices &= indices
219
278
if not full_indices and not abbr_indices :
@@ -241,7 +300,7 @@ def __find_weekday_format(self, directive):
241
300
if self .f_weekday [wd ] != self .a_weekday [wd ]:
242
301
indices = set (_findall (datetime , self .a_weekday [wd ]))
243
302
if abbr_indices is None :
244
- abbr_indices = indices
303
+ abbr_indices = set ( indices )
245
304
else :
246
305
abbr_indices &= indices
247
306
if not full_indices and not abbr_indices :
@@ -288,8 +347,10 @@ def __init__(self, locale_time=None):
288
347
# The " [1-9]" part of the regex is to make %c from ANSI C work
289
348
'd' : r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])" ,
290
349
'f' : r"(?P<f>[0-9]{1,6})" ,
291
- 'H' : r"(?P<H>2[0-3]|[0-1]\d|\d)" ,
350
+ 'H' : r"(?P<H>2[0-3]|[0-1]\d|\d| \d)" ,
351
+ 'k' : r"(?P<H>2[0-3]|[0-1]\d|\d| \d)" ,
292
352
'I' : r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])" ,
353
+ 'l' : r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])" ,
293
354
'G' : r"(?P<G>\d\d\d\d)" ,
294
355
'j' : r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])" ,
295
356
'm' : r"(?P<m>1[0-2]|0[1-9]|[1-9])" ,
@@ -312,16 +373,49 @@ def __init__(self, locale_time=None):
312
373
for tz in tz_names ),
313
374
'Z' ),
314
375
'%' : '%' }
315
- for d in 'dmyHIMS' :
316
- mapping ['O' + d ] = r'(?P<%s>\d\d|\d| \d)' % d
317
- mapping ['Ow' ] = r'(?P<w>\d)'
376
+ if self .locale_time .LC_alt_digits is None :
377
+ for d in 'dmyCHIMS' :
378
+ mapping ['O' + d ] = r'(?P<%s>\d\d|\d| \d)' % d
379
+ mapping ['Ow' ] = r'(?P<w>\d)'
380
+ else :
381
+ mapping .update ({
382
+ 'Od' : self .__seqToRE (self .locale_time .LC_alt_digits [1 :32 ], 'd' ,
383
+ '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]' ),
384
+ 'Om' : self .__seqToRE (self .locale_time .LC_alt_digits [1 :13 ], 'm' ,
385
+ '1[0-2]|0[1-9]|[1-9]' ),
386
+ 'Ow' : self .__seqToRE (self .locale_time .LC_alt_digits [:7 ], 'w' ,
387
+ '[0-6]' ),
388
+ 'Oy' : self .__seqToRE (self .locale_time .LC_alt_digits , 'y' ,
389
+ '[0-9][0-9]' ),
390
+ 'OC' : self .__seqToRE (self .locale_time .LC_alt_digits , 'C' ,
391
+ '[0-9][0-9]' ),
392
+ 'OH' : self .__seqToRE (self .locale_time .LC_alt_digits [:24 ], 'H' ,
393
+ '2[0-3]|[0-1][0-9]|[0-9]' ),
394
+ 'OI' : self .__seqToRE (self .locale_time .LC_alt_digits [1 :13 ], 'I' ,
395
+ '1[0-2]|0[1-9]|[1-9]' ),
396
+ 'OM' : self .__seqToRE (self .locale_time .LC_alt_digits [:60 ], 'M' ,
397
+ '[0-5][0-9]|[0-9]' ),
398
+ 'OS' : self .__seqToRE (self .locale_time .LC_alt_digits [:62 ], 'S' ,
399
+ '6[0-1]|[0-5][0-9]|[0-9]' ),
400
+ })
401
+ mapping .update ({
402
+ 'e' : mapping ['d' ],
403
+ 'Oe' : mapping ['Od' ],
404
+ 'P' : mapping ['p' ],
405
+ 'Op' : mapping ['p' ],
406
+ 'W' : mapping ['U' ].replace ('U' , 'W' ),
407
+ })
318
408
mapping ['W' ] = mapping ['U' ].replace ('U' , 'W' )
409
+
319
410
base .__init__ (mapping )
411
+ base .__setitem__ ('T' , self .pattern ('%H:%M:%S' ))
412
+ base .__setitem__ ('R' , self .pattern ('%H:%M' ))
413
+ base .__setitem__ ('r' , self .pattern (self .locale_time .LC_time_ampm ))
320
414
base .__setitem__ ('X' , self .pattern (self .locale_time .LC_time ))
321
415
base .__setitem__ ('x' , self .pattern (self .locale_time .LC_date ))
322
416
base .__setitem__ ('c' , self .pattern (self .locale_time .LC_date_time ))
323
417
324
- def __seqToRE (self , to_convert , directive ):
418
+ def __seqToRE (self , to_convert , directive , altregex = None ):
325
419
"""Convert a list to a regex string for matching a directive.
326
420
327
421
Want possible matching values to be from longest to shortest. This
@@ -337,8 +431,9 @@ def __seqToRE(self, to_convert, directive):
337
431
else :
338
432
return ''
339
433
regex = '|' .join (re_escape (stuff ) for stuff in to_convert )
340
- regex = '(?P<%s>%s' % (directive , regex )
341
- return '%s)' % regex
434
+ if altregex is not None :
435
+ regex += '|' + altregex
436
+ return '(?P<%s>%s)' % (directive , regex )
342
437
343
438
def pattern (self , format ):
344
439
"""Return regex pattern for the format string.
@@ -365,7 +460,7 @@ def repl(m):
365
460
nonlocal day_of_month_in_format
366
461
day_of_month_in_format = True
367
462
return self [format_char ]
368
- format = re_sub (r'%([OE]?\\?.?)' , repl , format )
463
+ format = re_sub (r'%[-_0^#]*[0-9]* ([OE]?\\?.?)' , repl , format )
369
464
if day_of_month_in_format and not year_in_format :
370
465
import warnings
371
466
warnings .warn ("""\
@@ -467,37 +562,50 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
467
562
# values
468
563
weekday = julian = None
469
564
found_dict = found .groupdict ()
565
+ if locale_time .LC_alt_digits :
566
+ def parse_int (s ):
567
+ try :
568
+ return locale_time .LC_alt_digits .index (s )
569
+ except ValueError :
570
+ return int (s )
571
+ else :
572
+ parse_int = int
573
+
470
574
for group_key in found_dict .keys ():
471
575
# Directives not explicitly handled below:
472
576
# c, x, X
473
577
# handled by making out of other directives
474
578
# U, W
475
579
# worthless without day of the week
476
580
if group_key == 'y' :
477
- year = int (found_dict ['y' ])
478
- # Open Group specification for strptime() states that a %y
479
- #value in the range of [00, 68] is in the century 2000, while
480
- #[69,99] is in the century 1900
481
- if year <= 68 :
482
- year += 2000
581
+ year = parse_int (found_dict ['y' ])
582
+ if 'C' in found_dict :
583
+ century = parse_int (found_dict ['C' ])
584
+ year += century * 100
483
585
else :
484
- year += 1900
586
+ # Open Group specification for strptime() states that a %y
587
+ #value in the range of [00, 68] is in the century 2000, while
588
+ #[69,99] is in the century 1900
589
+ if year <= 68 :
590
+ year += 2000
591
+ else :
592
+ year += 1900
485
593
elif group_key == 'Y' :
486
594
year = int (found_dict ['Y' ])
487
595
elif group_key == 'G' :
488
596
iso_year = int (found_dict ['G' ])
489
597
elif group_key == 'm' :
490
- month = int (found_dict ['m' ])
598
+ month = parse_int (found_dict ['m' ])
491
599
elif group_key == 'B' :
492
600
month = locale_time .f_month .index (found_dict ['B' ].lower ())
493
601
elif group_key == 'b' :
494
602
month = locale_time .a_month .index (found_dict ['b' ].lower ())
495
603
elif group_key == 'd' :
496
- day = int (found_dict ['d' ])
604
+ day = parse_int (found_dict ['d' ])
497
605
elif group_key == 'H' :
498
- hour = int (found_dict ['H' ])
606
+ hour = parse_int (found_dict ['H' ])
499
607
elif group_key == 'I' :
500
- hour = int (found_dict ['I' ])
608
+ hour = parse_int (found_dict ['I' ])
501
609
ampm = found_dict .get ('p' , '' ).lower ()
502
610
# If there was no AM/PM indicator, we'll treat this like AM
503
611
if ampm in ('' , locale_time .am_pm [0 ]):
@@ -513,9 +621,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
513
621
if hour != 12 :
514
622
hour += 12
515
623
elif group_key == 'M' :
516
- minute = int (found_dict ['M' ])
624
+ minute = parse_int (found_dict ['M' ])
517
625
elif group_key == 'S' :
518
- second = int (found_dict ['S' ])
626
+ second = parse_int (found_dict ['S' ])
519
627
elif group_key == 'f' :
520
628
s = found_dict ['f' ]
521
629
# Pad to always return microseconds.
0 commit comments