blob: cde6e5f2347be7416d971c5dd443857d9ff27603 [file] [log] [blame]
Guido van Rossum00efe7e2002-07-19 17:04:46 +00001"""Strptime-related classes and functions.
2
3CLASSES:
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching string of text containing
6 time information as is returned by time.strftime()
7
8FUNCTIONS:
9 firstjulian -- Calculates the Julian date up to the first of the specified
10 year
11 gregorian -- Calculates the Gregorian date based on the Julian day and
12 year
13 julianday -- Calculates the Julian day since the first of the year based
14 on the Gregorian date
15 dayofweek -- Calculates the day of the week from the Gregorian date.
16 strptime -- Calculates the time struct represented by the passed-in string
17
18Requires Python 2.2.1 or higher.
19Can be used in Python 2.2 if the following line is added:
20 >>> True = 1; False = 0
21
22"""
23import time
24import locale
25import calendar
26from re import compile as re_compile
27from re import IGNORECASE
28from string import whitespace as whitespace_string
29
30__version__ = (2,1,5)
31__author__ = "Brett Cannon"
32__email__ = "drifty@bigfoot.com"
33
34__all__ = ['strptime']
35
36class LocaleTime(object):
37 """Stores and handles locale-specific information related to time.
38
39 ATTRIBUTES (all read-only after instance creation! Instance variables that
40 store the values have mangled names):
41 f_weekday -- full weekday names (7-item list)
42 a_weekday -- abbreviated weekday names (7-item list)
43 f_month -- full weekday names (14-item list; dummy value in [0], which
44 is added by code)
45 a_month -- abbreviated weekday names (13-item list, dummy value in
46 [0], which is added by code)
47 am_pm -- AM/PM representation (2-item list)
48 LC_date_time -- format string for date/time representation (string)
49 LC_date -- format string for date representation (string)
50 LC_time -- format string for time representation (string)
51 timezone -- daylight- and non-daylight-savings timezone representation
52 (3-item list; code tacks on blank item at end for
53 possible lack of timezone such as UTC)
54 lang -- Language used by instance (string)
55
56 """
57
58 def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
59 a_month=None, am_pm=None, LC_date_time=None, LC_time=None, LC_date=None,
60 timezone=None, lang=None):
61 """Optionally set attributes with passed-in values."""
62 if f_weekday is None: self.__f_weekday = None
63 elif len(f_weekday) == 7: self.__f_weekday = list(f_weekday)
64 else:
65 raise TypeError("full weekday names must be a 7-item sequence")
66 if a_weekday is None: self.__a_weekday = None
67 elif len(a_weekday) == 7: self.__a_weekday = list(a_weekday)
68 else:
69 raise TypeError(
70 "abbreviated weekday names must be a 7-item sequence")
71 if f_month is None: self.__f_month = None
72 elif len(f_month) == 12:
73 self.__f_month = self.__pad(f_month, True)
74 else:
75 raise TypeError("full month names must be a 12-item sequence")
76 if a_month is None: self.__a_month = None
77 elif len(a_month) == 12:
78 self.__a_month = self.__pad(a_month, True)
79 else:
80 raise TypeError(
81 "abbreviated month names must be a 12-item sequence")
82 if am_pm is None:
83 self.__am_pm = None
84 elif len(am_pm) == 2:
85 self.__am_pm = am_pm
86 else:
87 raise TypeError("AM/PM representation must be a 2-item sequence")
88 self.__LC_date_time = LC_date_time
89 self.__LC_time = LC_time
90 self.__LC_date = LC_date
91 self.__timezone = timezone
92 if timezone:
93 if len(timezone) != 2:
94 raise TypeError("timezone names must contain 2 items")
95 else:
96 self.__timezone = self.__pad(timezone, False)
97 self.__lang = lang
98
99 def __pad(self, seq, front):
100 """Add '' to seq to either front (is True), else the back."""
101 seq = list(seq)
102 if front: seq.insert(0, '')
103 else: seq.append('')
104 return seq
105
106 def __set_nothing(self, stuff):
107 """Raise TypeError when trying to set an attribute."""
108 raise TypeError("attribute does not support assignment")
109
110 def __get_f_weekday(self):
111 """Fetch self.f_weekday."""
112 if not self.__f_weekday: self.__calc_weekday()
113 return self.__f_weekday
114
115 def __get_a_weekday(self):
116 """Fetch self.a_weekday."""
117 if not self.__a_weekday: self.__calc_weekday()
118 return self.__a_weekday
119
120 f_weekday = property(__get_f_weekday, __set_nothing,
121 doc="Full weekday names")
122 a_weekday = property(__get_a_weekday, __set_nothing,
123 doc="Abbreviated weekday names")
124
125 def __get_f_month(self):
126 """Fetch self.f_month."""
127 if not self.__f_month: self.__calc_month()
128 return self.__f_month
129
130 def __get_a_month(self):
131 """Fetch self.a_month."""
132 if not self.__a_month: self.__calc_month()
133 return self.__a_month
134
135 f_month = property(__get_f_month, __set_nothing,
136 doc="Full month names (dummy value at index 0)")
137 a_month = property(__get_a_month, __set_nothing,
138 doc="Abbreviated month names (dummy value at index 0)")
139
140 def __get_am_pm(self):
141 """Fetch self.am_pm."""
142 if not self.__am_pm: self.__calc_am_pm()
143 return self.__am_pm
144
145 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
146
147 def __get_timezone(self):
148 """Fetch self.timezone."""
149 if not self.__timezone: self.__calc_timezone()
150 return self.__timezone
151
152 timezone = property(__get_timezone, __set_nothing,
153 doc="Timezone representation (dummy value at index 2)")
154
155 def __get_LC_date_time(self):
156 """Fetch self.LC_date_time."""
157 if not self.__LC_date_time: self.__calc_date_time()
158 return self.__LC_date_time
159
160 def __get_LC_date(self):
161 """Fetch self.LC_date."""
162 if not self.__LC_date: self.__calc_date_time()
163 return self.__LC_date
164
165 def __get_LC_time(self):
166 """Fetch self.LC_time."""
167 if not self.__LC_time: self.__calc_date_time()
168 return self.__LC_time
169
170 LC_date_time = property(__get_LC_date_time, __set_nothing,
171 doc="Format string for locale's date/time representation ('%c' format)")
172 LC_date = property(__get_LC_date, __set_nothing,
173 doc="Format string for locale's date representation ('%x' format)")
174 LC_time = property(__get_LC_time, __set_nothing,
175 doc="Format string for locale's time representation ('%X' format)")
176
177 def __get_lang(self):
178 """Fetch self.lang."""
179 if not self.__lang: self.__calc_lang()
180 return self.__lang
181
182 lang = property(__get_lang, __set_nothing, doc="Language used for instance")
183
184 def __calc_weekday(self):
185 """Set self.__a_weekday and self.__f_weekday using the calendar module."""
186 a_weekday = [calendar.day_abbr[i] for i in range(7)]
187 f_weekday = [calendar.day_name[i] for i in range(7)]
188 if not self.__a_weekday: self.__a_weekday = a_weekday
189 if not self.__f_weekday: self.__f_weekday = f_weekday
190
191 def __calc_month(self):
192 """Set self.__f_month and self.__a_month using the calendar module."""
193 a_month = [calendar.month_abbr[i] for i in range(13)]
194 f_month = [calendar.month_name[i] for i in range(13)]
195 if not self.__a_month: self.__a_month = a_month
196 if not self.__f_month: self.__f_month = f_month
197
198 def __calc_am_pm(self):
199 """Set self.__am_pm by using time.strftime().
200
201 The magic date (2002, 3, 17, hour, 44, 44, 2, 76, 0) is not really
202 that magical; just happened to have used it everywhere else where a
203 static date was needed.
204
205 """
206 am_pm = []
207 for hour in (01,22):
208 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
209 am_pm.append(time.strftime("%p", time_tuple))
210 self.__am_pm = am_pm
211
212 def __calc_date_time(self):
213 """Set self.__date_time, self.__date, & self.__time by using time.strftime().
214
215 Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
216 overloaded numbers is minimized. The order in which searches for
217 values within the format string is very important; it eliminates
218 possible ambiguity for what something represents.
219
220 """
221 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
222 date_time = [None, None, None]
223 date_time[0] = time.strftime("%c", time_tuple)
224 date_time[1] = time.strftime("%x", time_tuple)
225 date_time[2] = time.strftime("%X", time_tuple)
226 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
227 current_format = date_time[offset]
228 current_format = current_format.replace('%', '%%')
229 current_format = current_format.replace(self.f_weekday[2], '%A')
230 current_format = current_format.replace(self.f_month[3], '%B')
231 current_format = current_format.replace(self.a_weekday[2], '%a')
232 current_format = current_format.replace(self.a_month[3], '%b')
233 current_format = current_format.replace(self.am_pm[1], '%p')
234 current_format = current_format.replace(self.timezone[0], '%Z')
235 current_format = current_format.replace(self.timezone[1], '%Z')
236 current_format = current_format.replace('1999', '%Y')
237 current_format = current_format.replace('99', '%y')
238 current_format = current_format.replace('22', '%H')
239 current_format = current_format.replace('44', '%M')
240 current_format = current_format.replace('55', '%S')
241 current_format = current_format.replace('76', '%j')
242 current_format = current_format.replace('17', '%d')
243 current_format = current_format.replace('03', '%m')
244 current_format = current_format.replace('2', '%w')
245 current_format = current_format.replace('10', '%I')
246 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
247 if time.strftime(directive, time_tuple).find('00'):
248 U_W = '%U'
249 else:
250 U_W = '%W'
251 date_time[offset] = current_format.replace('11', U_W)
252 if not self.__LC_date_time: self.__LC_date_time = date_time[0]
253 if not self.__LC_date: self.__LC_date = date_time[1]
254 if not self.__LC_time: self.__LC_time = date_time[2]
255
256 def __calc_timezone(self):
257 """Set self.__timezone by using time.tzname.
258
259 Empty string used for matching when timezone is not used/needed such
260 as with UTC.
261
262 """
263 self.__timezone = self.__pad(time.tzname, 0)
264
265 def __calc_lang(self):
266 """Set self.lang by using locale.getlocale() or
267 locale.getdefaultlocale().
268
269 """
270 current_lang = locale.getlocale(locale.LC_TIME)[0]
271 if current_lang: self.__lang = current_lang
272 else: self.__lang = locale.getdefaultlocale()[0]
273
274class TimeRE(dict):
275 """Handle conversion from format directives to regexes."""
276
277 def __init__(self, locale_time=LocaleTime()):
278 """Initialize instance with non-locale regexes and store LocaleTime object."""
279 super(TimeRE,self).__init__({
280 'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)", #The " \d" option is
281 #to make %c from ANSI
282 #C work
283 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
284 'I': r"(?P<I>0\d|1[0-2]|\d)",
285 'j': r"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)",
286 'm': r"(?P<m>0\d|1[0-2]|\d)",
287 'M': r"(?P<M>[0-5]\d|\d)",
288 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
289 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
290 'w': r"(?P<w>[0-6])",
291 'W': r"(?P<W>5[0-3]|[0-4]\d|\d)", #Same as U
292 'y': r"(?P<y>\d\d)",
293 'Y': r"(?P<Y>\d\d\d\d)"})
294 self.locale_time = locale_time
295
296 def __getitem__(self, fetch):
297 """Try to fetch regex; if it does not exist, construct it."""
298 try:
299 return super(TimeRE,self).__getitem__(fetch)
300 except KeyError:
301 if fetch == 'A':
302 self[fetch] = self.__seqToRE(self.locale_time.f_weekday,
303 fetch)
304 elif fetch == 'a':
305 self[fetch] = self.__seqToRE(self.locale_time.a_weekday,
306 fetch)
307 elif fetch == 'B':
308 self[fetch] = self.__seqToRE(self.locale_time.f_month[1:],
309 fetch)
310 elif fetch == 'b':
311 self[fetch] = self.__seqToRE(self.locale_time.a_month[1:],
312 fetch)
313 elif fetch == 'c':
314 self[fetch] = self.pattern(self.locale_time.LC_date_time)
315 elif fetch == 'p':
316 self[fetch] = self.__seqToRE(self.locale_time.am_pm, fetch)
317 elif fetch == 'x':
318 self[fetch] = self.pattern(self.locale_time.LC_date)
319 elif fetch == 'X':
320 self[fetch] = self.pattern(self.locale_time.LC_time)
321 elif fetch == 'Z':
322 self[fetch] = self.__seqToRE(self.locale_time.timezone,
323 fetch)
324 elif fetch == '%':
325 return '%'
326 return super(TimeRE,self).__getitem__(fetch)
327
328 def __seqToRE(self, to_convert, directive):
329 """Convert a list to a regex string for matching directive."""
330 def sorter(a, b):
331 """Sort based on length.
332
333 Done in case for some strange reason that names in the locale only
334 differ by a suffix and thus want the name with the suffix to match
335 first.
336
337 """
338 try: a_length = len(a)
339 except TypeError: a_length = 0
340 try: b_length = len(b)
341 except TypeError: b_length = 0
342 return cmp(b_length, a_length)
343
344 to_convert = to_convert[:] #Don't want to change value in-place.
345 to_convert.sort(sorter)
346 regex = '(?P<%s>' % directive
347 for item in to_convert:
348 regex = "%s(?:%s)|" % (regex, item)
349 else:
350 regex = regex[:-1]
351 return '%s)' % regex
352
353 def pattern(self, format):
354 """Return re pattern for the format string."""
355 processed_format = ''
356 for whitespace in whitespace_string:
357 format = format.replace(whitespace, r'\s*')
358 while format.find('%') != -1:
359 directive_index = format.index('%')+1
360 processed_format = "%s%s%s" % (processed_format,
361 format[:directive_index-1],
362 self[format[directive_index]])
363 format = format[directive_index+1:]
364 return "%s%s" % (processed_format, format)
365
366 def compile(self, format):
367 """Return a compiled re object for the format string."""
368 format = "(?#%s)%s" % (self.locale_time.lang,format)
369 return re_compile(self.pattern(format), IGNORECASE)
370
371
372def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
373 """Convert data_string to a time struct based on the format string or re object; will return an re object for format if data_string is False.
374
375 The object passed in for format may either be a re object compiled by
376 strptime() or a format string. If False is passed in for data_string
377 then an re object for format will be returned. The re object
378 must be used with the same language as used to compile the re object.
379
380 """
381 locale_time = LocaleTime()
382 if isinstance(format, type(re_compile(''))):
383 if format.pattern.find(locale_time.lang) == -1:
384 raise TypeError("re object not created with same language as \
385 LocaleTime instance")
386 else:
387 compiled_re = format
388 else:
389 compiled_re = TimeRE(locale_time).compile(format)
390 if data_string is False:
391 return compiled_re
392 else:
393 found = compiled_re.match(data_string)
394 if not found:
395 raise ValueError("time data did not match format")
396 year = month = day = hour = minute = second = weekday = julian = tz = -1
397 found_dict = found.groupdict()
398 for group_key in found_dict.iterkeys():
399 if group_key in 'yY':
400 if group_key is 'y':
401 year = int("%s%s" % (time.strftime("%Y")[:-2], found_dict['y']))
402 else:
403 year = int(found_dict['Y'])
404 elif group_key in 'Bbm':
405 if group_key is 'm':
406 month = int(found_dict['m'])
407 elif group_key is 'B':
408 month = locale_time.f_month.index(found_dict['B'])
409 else:
410 month = locale_time.a_month.index(found_dict['b'])
411 elif group_key is 'd':
412 day = int(found_dict['d'])
413 elif group_key in 'HI':
414 if group_key is 'H':
415 hour = int(found_dict['H'])
416 else:
417 hour = int(found_dict['I'])
418 if found_dict.has_key('p'):
419 if found_dict['p'] == locale_time.am_pm[1]:
420 hour += 12
421 else:
422 if hour is 12:
423 hour = 0
424 elif group_key is 'M':
425 minute = int(found_dict['M'])
426 elif group_key is 'S':
427 second = int(found_dict['S'])
428 elif group_key in 'Aaw':
429 if group_key is 'A':
430 weekday = locale_time.f_weekday.index(found_dict['A'])
431 elif group_key is 'a':
432 weekday = locale_time.a_weekday.index(found_dict['a'])
433 else:
434 weekday = int(found_dict['w'])
435 if weekday == 0:
436 weekday = 6
437 else:
438 weekday -= 1
439 elif group_key is 'j':
440 julian = int(found_dict['j'])
441 elif group_key is 'Z':
442 if locale_time.timezone[0] == found_dict['Z']:
443 tz = 0
444 elif locale_time.timezone[1] == found_dict['Z']:
445 tz = 1
446 elif locale_time.timezone[2] == found_dict['Z']:
447 tz = 0
448 if julian == -1 and year != -1 and month != -1 and day != -1:
449 julian = julianday(year, month, day)
450 if (month == -1 or day == -1) and julian != -1 and year != -1:
451 year,month,day = gregorian(julian, year)
452 if weekday == -1 and year != -1 and month != -1 and day != -1:
453 weekday = dayofweek(year, month, day)
454 return time.struct_time((year,month,day,hour,minute,second,weekday,
455 julian,tz))
456
457def firstjulian(year):
458 """Calculate the Julian date up until the first of the year."""
459 return ((146097*(year+4799))//400)-31738
460
461def julianday(year, month, day):
462 """Calculate the Julian day since the beginning of the year from the Gregorian date."""
463 a = (14-month)//12
464 return (day-32045+(((153*(month+(12*a)-3))+2)//5)+\
465 ((146097*(year+4800-a))//400))-firstjulian(year)+1
466
467def gregorian(julian, year):
468 """Return a 3-item list containing the Gregorian date based on the Julian day."""
469 a = 32043+julian+firstjulian(year)
470 b = ((4*a)+3)//146097
471 c = a-((146097*b)//4)
472 d = ((4*c)+3)//1461
473 e = c-((1461*d)//4)
474 m = ((5*e)+2)//153
475 day = 1+e-(((153*m)+2)//5)
476 month = m+3-(12*(m//10))
477 year = (100*b)+d-4800+(m//10)
478 return [year, month, day]
479
480def dayofweek(year, month, day):
481 """Calculate the day of the week (Monday is 0)."""
482 a = (14-month)//12
483 y = year-a
484 weekday = (day+y+((97*y)//400)+((31*(month+(12*a)-2))//12))%7
485 if weekday == 0:
486 return 6
487 else:
488 return weekday-1