blob: f7e04cdb2fde186ae80a4e7e18d0489131ba9d61 [file] [log] [blame]
Guido van Rossum00efe7e2002-07-19 17:04:46 +00001"""Strptime-related classes and functions.
2
3CLASSES:
4 LocaleTime -- Discovers and/or stores locale-specific time information
Tim Peters469cdad2002-08-08 20:19:19 +00005 TimeRE -- Creates regexes for pattern matching string of text containing
Guido van Rossum00efe7e2002-07-19 17:04:46 +00006 time information as is returned by time.strftime()
7
8FUNCTIONS:
9 firstjulian -- Calculates the Julian date up to the first of the specified
10 year
11 gregorian -- Calculates the Gregorian date based on the Julian day and
12 year
Tim Peters469cdad2002-08-08 20:19:19 +000013 julianday -- Calculates the Julian day since the first of the year based
Guido van Rossum00efe7e2002-07-19 17:04:46 +000014 on the Gregorian date
15 dayofweek -- Calculates the day of the week from the Gregorian date.
16 strptime -- Calculates the time struct represented by the passed-in string
17
18Requires Python 2.2.1 or higher.
19Can be used in Python 2.2 if the following line is added:
20 >>> True = 1; False = 0
21
22"""
23import time
24import locale
25import calendar
26from re import compile as re_compile
27from re import IGNORECASE
28from string import whitespace as whitespace_string
29
30__version__ = (2,1,5)
31__author__ = "Brett Cannon"
32__email__ = "drifty@bigfoot.com"
33
34__all__ = ['strptime']
35
36class LocaleTime(object):
37 """Stores and handles locale-specific information related to time.
38
39 ATTRIBUTES (all read-only after instance creation! Instance variables that
40 store the values have mangled names):
41 f_weekday -- full weekday names (7-item list)
42 a_weekday -- abbreviated weekday names (7-item list)
Tim Peters469cdad2002-08-08 20:19:19 +000043 f_month -- full weekday names (14-item list; dummy value in [0], which
Guido van Rossum00efe7e2002-07-19 17:04:46 +000044 is added by code)
Tim Peters469cdad2002-08-08 20:19:19 +000045 a_month -- abbreviated weekday names (13-item list, dummy value in
Guido van Rossum00efe7e2002-07-19 17:04:46 +000046 [0], which is added by code)
47 am_pm -- AM/PM representation (2-item list)
48 LC_date_time -- format string for date/time representation (string)
49 LC_date -- format string for date representation (string)
50 LC_time -- format string for time representation (string)
Tim Peters469cdad2002-08-08 20:19:19 +000051 timezone -- daylight- and non-daylight-savings timezone representation
52 (3-item list; code tacks on blank item at end for
Guido van Rossum00efe7e2002-07-19 17:04:46 +000053 possible lack of timezone such as UTC)
54 lang -- Language used by instance (string)
Tim Peters469cdad2002-08-08 20:19:19 +000055
Guido van Rossum00efe7e2002-07-19 17:04:46 +000056 """
57
Tim Peters469cdad2002-08-08 20:19:19 +000058 def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
59 a_month=None, am_pm=None, LC_date_time=None, LC_time=None, LC_date=None,
Guido van Rossum00efe7e2002-07-19 17:04:46 +000060 timezone=None, lang=None):
61 """Optionally set attributes with passed-in values."""
62 if f_weekday is None: self.__f_weekday = None
63 elif len(f_weekday) == 7: self.__f_weekday = list(f_weekday)
64 else:
65 raise TypeError("full weekday names must be a 7-item sequence")
66 if a_weekday is None: self.__a_weekday = None
67 elif len(a_weekday) == 7: self.__a_weekday = list(a_weekday)
68 else:
69 raise TypeError(
70 "abbreviated weekday names must be a 7-item sequence")
71 if f_month is None: self.__f_month = None
72 elif len(f_month) == 12:
73 self.__f_month = self.__pad(f_month, True)
74 else:
75 raise TypeError("full month names must be a 12-item sequence")
76 if a_month is None: self.__a_month = None
77 elif len(a_month) == 12:
78 self.__a_month = self.__pad(a_month, True)
79 else:
80 raise TypeError(
81 "abbreviated month names must be a 12-item sequence")
82 if am_pm is None:
83 self.__am_pm = None
84 elif len(am_pm) == 2:
85 self.__am_pm = am_pm
86 else:
87 raise TypeError("AM/PM representation must be a 2-item sequence")
88 self.__LC_date_time = LC_date_time
89 self.__LC_time = LC_time
90 self.__LC_date = LC_date
91 self.__timezone = timezone
92 if timezone:
93 if len(timezone) != 2:
94 raise TypeError("timezone names must contain 2 items")
95 else:
96 self.__timezone = self.__pad(timezone, False)
97 self.__lang = lang
98
99 def __pad(self, seq, front):
100 """Add '' to seq to either front (is True), else the back."""
101 seq = list(seq)
102 if front: seq.insert(0, '')
103 else: seq.append('')
104 return seq
105
106 def __set_nothing(self, stuff):
107 """Raise TypeError when trying to set an attribute."""
108 raise TypeError("attribute does not support assignment")
109
110 def __get_f_weekday(self):
111 """Fetch self.f_weekday."""
112 if not self.__f_weekday: self.__calc_weekday()
113 return self.__f_weekday
114
115 def __get_a_weekday(self):
116 """Fetch self.a_weekday."""
117 if not self.__a_weekday: self.__calc_weekday()
118 return self.__a_weekday
119
Tim Peters469cdad2002-08-08 20:19:19 +0000120 f_weekday = property(__get_f_weekday, __set_nothing,
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000121 doc="Full weekday names")
Tim Peters469cdad2002-08-08 20:19:19 +0000122 a_weekday = property(__get_a_weekday, __set_nothing,
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000123 doc="Abbreviated weekday names")
124
125 def __get_f_month(self):
126 """Fetch self.f_month."""
127 if not self.__f_month: self.__calc_month()
128 return self.__f_month
129
130 def __get_a_month(self):
131 """Fetch self.a_month."""
132 if not self.__a_month: self.__calc_month()
133 return self.__a_month
134
135 f_month = property(__get_f_month, __set_nothing,
136 doc="Full month names (dummy value at index 0)")
137 a_month = property(__get_a_month, __set_nothing,
138 doc="Abbreviated month names (dummy value at index 0)")
139
140 def __get_am_pm(self):
141 """Fetch self.am_pm."""
142 if not self.__am_pm: self.__calc_am_pm()
143 return self.__am_pm
144
145 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
146
147 def __get_timezone(self):
148 """Fetch self.timezone."""
149 if not self.__timezone: self.__calc_timezone()
150 return self.__timezone
151
152 timezone = property(__get_timezone, __set_nothing,
153 doc="Timezone representation (dummy value at index 2)")
154
155 def __get_LC_date_time(self):
156 """Fetch self.LC_date_time."""
157 if not self.__LC_date_time: self.__calc_date_time()
158 return self.__LC_date_time
159
160 def __get_LC_date(self):
161 """Fetch self.LC_date."""
162 if not self.__LC_date: self.__calc_date_time()
163 return self.__LC_date
164
165 def __get_LC_time(self):
166 """Fetch self.LC_time."""
167 if not self.__LC_time: self.__calc_date_time()
168 return self.__LC_time
169
170 LC_date_time = property(__get_LC_date_time, __set_nothing,
171 doc="Format string for locale's date/time representation ('%c' format)")
172 LC_date = property(__get_LC_date, __set_nothing,
173 doc="Format string for locale's date representation ('%x' format)")
174 LC_time = property(__get_LC_time, __set_nothing,
175 doc="Format string for locale's time representation ('%X' format)")
176
177 def __get_lang(self):
178 """Fetch self.lang."""
179 if not self.__lang: self.__calc_lang()
180 return self.__lang
181
182 lang = property(__get_lang, __set_nothing, doc="Language used for instance")
183
184 def __calc_weekday(self):
185 """Set self.__a_weekday and self.__f_weekday using the calendar module."""
186 a_weekday = [calendar.day_abbr[i] for i in range(7)]
187 f_weekday = [calendar.day_name[i] for i in range(7)]
188 if not self.__a_weekday: self.__a_weekday = a_weekday
189 if not self.__f_weekday: self.__f_weekday = f_weekday
Tim Peters469cdad2002-08-08 20:19:19 +0000190
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000191 def __calc_month(self):
192 """Set self.__f_month and self.__a_month using the calendar module."""
193 a_month = [calendar.month_abbr[i] for i in range(13)]
194 f_month = [calendar.month_name[i] for i in range(13)]
195 if not self.__a_month: self.__a_month = a_month
196 if not self.__f_month: self.__f_month = f_month
197
198 def __calc_am_pm(self):
199 """Set self.__am_pm by using time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000200
201 The magic date (2002, 3, 17, hour, 44, 44, 2, 76, 0) is not really
202 that magical; just happened to have used it everywhere else where a
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000203 static date was needed.
Tim Peters469cdad2002-08-08 20:19:19 +0000204
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000205 """
206 am_pm = []
207 for hour in (01,22):
208 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
209 am_pm.append(time.strftime("%p", time_tuple))
210 self.__am_pm = am_pm
211
212 def __calc_date_time(self):
213 """Set self.__date_time, self.__date, & self.__time by using time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000214
215 Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
216 overloaded numbers is minimized. The order in which searches for
217 values within the format string is very important; it eliminates
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000218 possible ambiguity for what something represents.
219
220 """
221 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
222 date_time = [None, None, None]
223 date_time[0] = time.strftime("%c", time_tuple)
224 date_time[1] = time.strftime("%x", time_tuple)
225 date_time[2] = time.strftime("%X", time_tuple)
226 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
227 current_format = date_time[offset]
228 current_format = current_format.replace('%', '%%')
229 current_format = current_format.replace(self.f_weekday[2], '%A')
230 current_format = current_format.replace(self.f_month[3], '%B')
231 current_format = current_format.replace(self.a_weekday[2], '%a')
232 current_format = current_format.replace(self.a_month[3], '%b')
233 current_format = current_format.replace(self.am_pm[1], '%p')
234 current_format = current_format.replace(self.timezone[0], '%Z')
235 current_format = current_format.replace(self.timezone[1], '%Z')
236 current_format = current_format.replace('1999', '%Y')
237 current_format = current_format.replace('99', '%y')
238 current_format = current_format.replace('22', '%H')
239 current_format = current_format.replace('44', '%M')
240 current_format = current_format.replace('55', '%S')
241 current_format = current_format.replace('76', '%j')
242 current_format = current_format.replace('17', '%d')
243 current_format = current_format.replace('03', '%m')
244 current_format = current_format.replace('2', '%w')
245 current_format = current_format.replace('10', '%I')
246 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
247 if time.strftime(directive, time_tuple).find('00'):
248 U_W = '%U'
249 else:
250 U_W = '%W'
251 date_time[offset] = current_format.replace('11', U_W)
252 if not self.__LC_date_time: self.__LC_date_time = date_time[0]
253 if not self.__LC_date: self.__LC_date = date_time[1]
254 if not self.__LC_time: self.__LC_time = date_time[2]
255
256 def __calc_timezone(self):
257 """Set self.__timezone by using time.tzname.
Tim Peters469cdad2002-08-08 20:19:19 +0000258
259 Empty string used for matching when timezone is not used/needed such
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000260 as with UTC.
261
262 """
263 self.__timezone = self.__pad(time.tzname, 0)
264
265 def __calc_lang(self):
Tim Peters469cdad2002-08-08 20:19:19 +0000266 """Set self.lang by using locale.getlocale() or
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000267 locale.getdefaultlocale().
Tim Peters469cdad2002-08-08 20:19:19 +0000268
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000269 """
270 current_lang = locale.getlocale(locale.LC_TIME)[0]
271 if current_lang: self.__lang = current_lang
272 else: self.__lang = locale.getdefaultlocale()[0]
273
274class TimeRE(dict):
275 """Handle conversion from format directives to regexes."""
276
277 def __init__(self, locale_time=LocaleTime()):
278 """Initialize instance with non-locale regexes and store LocaleTime object."""
279 super(TimeRE,self).__init__({
Tim Peters469cdad2002-08-08 20:19:19 +0000280 'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)", #The " \d" option is
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000281 #to make %c from ANSI
282 #C work
283 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
284 'I': r"(?P<I>0\d|1[0-2]|\d)",
285 'j': r"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)",
286 'm': r"(?P<m>0\d|1[0-2]|\d)",
287 'M': r"(?P<M>[0-5]\d|\d)",
288 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
289 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
290 'w': r"(?P<w>[0-6])",
291 'W': r"(?P<W>5[0-3]|[0-4]\d|\d)", #Same as U
292 'y': r"(?P<y>\d\d)",
293 'Y': r"(?P<Y>\d\d\d\d)"})
294 self.locale_time = locale_time
295
296 def __getitem__(self, fetch):
297 """Try to fetch regex; if it does not exist, construct it."""
298 try:
299 return super(TimeRE,self).__getitem__(fetch)
300 except KeyError:
301 if fetch == 'A':
Tim Peters469cdad2002-08-08 20:19:19 +0000302 self[fetch] = self.__seqToRE(self.locale_time.f_weekday,
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000303 fetch)
304 elif fetch == 'a':
Tim Peters469cdad2002-08-08 20:19:19 +0000305 self[fetch] = self.__seqToRE(self.locale_time.a_weekday,
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000306 fetch)
307 elif fetch == 'B':
Tim Peters469cdad2002-08-08 20:19:19 +0000308 self[fetch] = self.__seqToRE(self.locale_time.f_month[1:],
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000309 fetch)
310 elif fetch == 'b':
Tim Peters469cdad2002-08-08 20:19:19 +0000311 self[fetch] = self.__seqToRE(self.locale_time.a_month[1:],
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000312 fetch)
313 elif fetch == 'c':
314 self[fetch] = self.pattern(self.locale_time.LC_date_time)
315 elif fetch == 'p':
316 self[fetch] = self.__seqToRE(self.locale_time.am_pm, fetch)
317 elif fetch == 'x':
318 self[fetch] = self.pattern(self.locale_time.LC_date)
319 elif fetch == 'X':
320 self[fetch] = self.pattern(self.locale_time.LC_time)
321 elif fetch == 'Z':
Tim Peters469cdad2002-08-08 20:19:19 +0000322 self[fetch] = self.__seqToRE(self.locale_time.timezone,
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000323 fetch)
324 elif fetch == '%':
325 return '%'
326 return super(TimeRE,self).__getitem__(fetch)
Tim Peters469cdad2002-08-08 20:19:19 +0000327
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000328 def __seqToRE(self, to_convert, directive):
329 """Convert a list to a regex string for matching directive."""
330 def sorter(a, b):
331 """Sort based on length.
Tim Peters469cdad2002-08-08 20:19:19 +0000332
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000333 Done in case for some strange reason that names in the locale only
334 differ by a suffix and thus want the name with the suffix to match
335 first.
Tim Peters469cdad2002-08-08 20:19:19 +0000336
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000337 """
338 try: a_length = len(a)
339 except TypeError: a_length = 0
340 try: b_length = len(b)
341 except TypeError: b_length = 0
342 return cmp(b_length, a_length)
Tim Peters469cdad2002-08-08 20:19:19 +0000343
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000344 to_convert = to_convert[:] #Don't want to change value in-place.
345 to_convert.sort(sorter)
346 regex = '(?P<%s>' % directive
347 for item in to_convert:
348 regex = "%s(?:%s)|" % (regex, item)
349 else:
350 regex = regex[:-1]
351 return '%s)' % regex
352
353 def pattern(self, format):
354 """Return re pattern for the format string."""
355 processed_format = ''
356 for whitespace in whitespace_string:
357 format = format.replace(whitespace, r'\s*')
358 while format.find('%') != -1:
359 directive_index = format.index('%')+1
Tim Peters469cdad2002-08-08 20:19:19 +0000360 processed_format = "%s%s%s" % (processed_format,
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000361 format[:directive_index-1],
362 self[format[directive_index]])
363 format = format[directive_index+1:]
364 return "%s%s" % (processed_format, format)
365
366 def compile(self, format):
367 """Return a compiled re object for the format string."""
368 format = "(?#%s)%s" % (self.locale_time.lang,format)
369 return re_compile(self.pattern(format), IGNORECASE)
370
371
372def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
373 """Convert data_string to a time struct based on the format string or re object; will return an re object for format if data_string is False.
Tim Peters469cdad2002-08-08 20:19:19 +0000374
375 The object passed in for format may either be a re object compiled by
376 strptime() or a format string. If False is passed in for data_string
377 then an re object for format will be returned. The re object
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000378 must be used with the same language as used to compile the re object.
Tim Peters469cdad2002-08-08 20:19:19 +0000379
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000380 """
381 locale_time = LocaleTime()
382 if isinstance(format, type(re_compile(''))):
383 if format.pattern.find(locale_time.lang) == -1:
384 raise TypeError("re object not created with same language as \
385 LocaleTime instance")
386 else:
387 compiled_re = format
388 else:
389 compiled_re = TimeRE(locale_time).compile(format)
390 if data_string is False:
391 return compiled_re
392 else:
393 found = compiled_re.match(data_string)
394 if not found:
395 raise ValueError("time data did not match format")
396 year = month = day = hour = minute = second = weekday = julian = tz = -1
397 found_dict = found.groupdict()
398 for group_key in found_dict.iterkeys():
399 if group_key in 'yY':
400 if group_key is 'y':
401 year = int("%s%s" % (time.strftime("%Y")[:-2], found_dict['y']))
402 else:
403 year = int(found_dict['Y'])
404 elif group_key in 'Bbm':
405 if group_key is 'm':
406 month = int(found_dict['m'])
407 elif group_key is 'B':
408 month = locale_time.f_month.index(found_dict['B'])
409 else:
410 month = locale_time.a_month.index(found_dict['b'])
411 elif group_key is 'd':
412 day = int(found_dict['d'])
413 elif group_key in 'HI':
414 if group_key is 'H':
415 hour = int(found_dict['H'])
416 else:
417 hour = int(found_dict['I'])
418 if found_dict.has_key('p'):
419 if found_dict['p'] == locale_time.am_pm[1]:
420 hour += 12
421 else:
422 if hour is 12:
423 hour = 0
424 elif group_key is 'M':
425 minute = int(found_dict['M'])
426 elif group_key is 'S':
427 second = int(found_dict['S'])
428 elif group_key in 'Aaw':
429 if group_key is 'A':
430 weekday = locale_time.f_weekday.index(found_dict['A'])
431 elif group_key is 'a':
432 weekday = locale_time.a_weekday.index(found_dict['a'])
433 else:
434 weekday = int(found_dict['w'])
435 if weekday == 0:
436 weekday = 6
437 else:
438 weekday -= 1
439 elif group_key is 'j':
440 julian = int(found_dict['j'])
441 elif group_key is 'Z':
442 if locale_time.timezone[0] == found_dict['Z']:
443 tz = 0
444 elif locale_time.timezone[1] == found_dict['Z']:
445 tz = 1
446 elif locale_time.timezone[2] == found_dict['Z']:
447 tz = 0
448 if julian == -1 and year != -1 and month != -1 and day != -1:
449 julian = julianday(year, month, day)
450 if (month == -1 or day == -1) and julian != -1 and year != -1:
451 year,month,day = gregorian(julian, year)
452 if weekday == -1 and year != -1 and month != -1 and day != -1:
453 weekday = dayofweek(year, month, day)
454 return time.struct_time((year,month,day,hour,minute,second,weekday,
455 julian,tz))
456
457def firstjulian(year):
458 """Calculate the Julian date up until the first of the year."""
459 return ((146097*(year+4799))//400)-31738
460
461def julianday(year, month, day):
462 """Calculate the Julian day since the beginning of the year from the Gregorian date."""
463 a = (14-month)//12
464 return (day-32045+(((153*(month+(12*a)-3))+2)//5)+\
465 ((146097*(year+4800-a))//400))-firstjulian(year)+1
466
467def gregorian(julian, year):
468 """Return a 3-item list containing the Gregorian date based on the Julian day."""
469 a = 32043+julian+firstjulian(year)
470 b = ((4*a)+3)//146097
471 c = a-((146097*b)//4)
472 d = ((4*c)+3)//1461
473 e = c-((1461*d)//4)
474 m = ((5*e)+2)//153
475 day = 1+e-(((153*m)+2)//5)
476 month = m+3-(12*(m//10))
477 year = (100*b)+d-4800+(m//10)
478 return [year, month, day]
479
480def dayofweek(year, month, day):
481 """Calculate the day of the week (Monday is 0)."""
482 a = (14-month)//12
483 y = year-a
484 weekday = (day+y+((97*y)//400)+((31*(month+(12*a)-2))//12))%7
485 if weekday == 0:
486 return 6
487 else:
488 return weekday-1