blob: 0777b7e801d449ff7cb2bb35bf3f21f563eb3e45 [file] [log] [blame]
Guido van Rossum00efe7e2002-07-19 17:04:46 +00001"""Strptime-related classes and functions.
2
3CLASSES:
4 LocaleTime -- Discovers and/or stores locale-specific time information
Barry Warsaw4d895fa2002-09-23 22:46:49 +00005 TimeRE -- Creates regexes for pattern matching a string of text containing
Guido van Rossum00efe7e2002-07-19 17:04:46 +00006 time information as is returned by time.strftime()
7
8FUNCTIONS:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +00009 _getlang -- Figure out what language is being used for the locale
Guido van Rossum00efe7e2002-07-19 17:04:46 +000010 strptime -- Calculates the time struct represented by the passed-in string
11
Raymond Hettinger1fdb6332003-03-09 07:44:42 +000012Requires Python 2.2.1 or higher (mainly because of the use of property()).
Guido van Rossum00efe7e2002-07-19 17:04:46 +000013Can be used in Python 2.2 if the following line is added:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +000014 True = 1; False = 0
Guido van Rossum00efe7e2002-07-19 17:04:46 +000015"""
16import time
17import locale
18import calendar
19from re import compile as re_compile
20from re import IGNORECASE
Raymond Hettinger1fdb6332003-03-09 07:44:42 +000021from datetime import date as datetime_date
Guido van Rossum00efe7e2002-07-19 17:04:46 +000022
Guido van Rossum00efe7e2002-07-19 17:04:46 +000023__author__ = "Brett Cannon"
Raymond Hettinger1fdb6332003-03-09 07:44:42 +000024__email__ = "brett@python.org"
Guido van Rossum00efe7e2002-07-19 17:04:46 +000025
26__all__ = ['strptime']
27
Tim Peters80cebc12003-01-19 04:40:44 +000028def _getlang():
29 # Figure out what the current language is set to.
30 current_lang = locale.getlocale(locale.LC_TIME)[0]
31 if current_lang:
32 return current_lang
33 else:
34 current_lang = locale.getdefaultlocale()[0]
35 if current_lang:
36 return current_lang
37 else:
38 return ''
Barry Warsaw35816e62002-08-29 16:24:50 +000039
Guido van Rossum00efe7e2002-07-19 17:04:46 +000040class LocaleTime(object):
41 """Stores and handles locale-specific information related to time.
42
43 ATTRIBUTES (all read-only after instance creation! Instance variables that
44 store the values have mangled names):
45 f_weekday -- full weekday names (7-item list)
46 a_weekday -- abbreviated weekday names (7-item list)
Tim Peters469cdad2002-08-08 20:19:19 +000047 f_month -- full weekday names (14-item list; dummy value in [0], which
Guido van Rossum00efe7e2002-07-19 17:04:46 +000048 is added by code)
Tim Peters469cdad2002-08-08 20:19:19 +000049 a_month -- abbreviated weekday names (13-item list, dummy value in
Guido van Rossum00efe7e2002-07-19 17:04:46 +000050 [0], which is added by code)
51 am_pm -- AM/PM representation (2-item list)
52 LC_date_time -- format string for date/time representation (string)
53 LC_date -- format string for date representation (string)
54 LC_time -- format string for time representation (string)
Tim Peters469cdad2002-08-08 20:19:19 +000055 timezone -- daylight- and non-daylight-savings timezone representation
56 (3-item list; code tacks on blank item at end for
Guido van Rossum00efe7e2002-07-19 17:04:46 +000057 possible lack of timezone such as UTC)
58 lang -- Language used by instance (string)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000059 """
60
Tim Peters469cdad2002-08-08 20:19:19 +000061 def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
Barry Warsaw35816e62002-08-29 16:24:50 +000062 a_month=None, am_pm=None, LC_date_time=None, LC_time=None,
63 LC_date=None, timezone=None, lang=None):
Guido van Rossum00efe7e2002-07-19 17:04:46 +000064 """Optionally set attributes with passed-in values."""
Barry Warsaw35816e62002-08-29 16:24:50 +000065 if f_weekday is None:
66 self.__f_weekday = None
67 elif len(f_weekday) == 7:
68 self.__f_weekday = list(f_weekday)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000069 else:
70 raise TypeError("full weekday names must be a 7-item sequence")
Barry Warsaw35816e62002-08-29 16:24:50 +000071 if a_weekday is None:
72 self.__a_weekday = None
73 elif len(a_weekday) == 7:
74 self.__a_weekday = list(a_weekday)
Guido van Rossum00efe7e2002-07-19 17:04:46 +000075 else:
76 raise TypeError(
Barry Warsaw35816e62002-08-29 16:24:50 +000077 "abbreviated weekday names must be a 7-item sequence")
78 if f_month is None:
79 self.__f_month = None
Guido van Rossum00efe7e2002-07-19 17:04:46 +000080 elif len(f_month) == 12:
81 self.__f_month = self.__pad(f_month, True)
82 else:
83 raise TypeError("full month names must be a 12-item sequence")
Barry Warsaw35816e62002-08-29 16:24:50 +000084 if a_month is None:
85 self.__a_month = None
Guido van Rossum00efe7e2002-07-19 17:04:46 +000086 elif len(a_month) == 12:
87 self.__a_month = self.__pad(a_month, True)
88 else:
89 raise TypeError(
Barry Warsaw35816e62002-08-29 16:24:50 +000090 "abbreviated month names must be a 12-item sequence")
Guido van Rossum00efe7e2002-07-19 17:04:46 +000091 if am_pm is None:
92 self.__am_pm = None
93 elif len(am_pm) == 2:
94 self.__am_pm = am_pm
95 else:
96 raise TypeError("AM/PM representation must be a 2-item sequence")
97 self.__LC_date_time = LC_date_time
98 self.__LC_time = LC_time
99 self.__LC_date = LC_date
100 self.__timezone = timezone
101 if timezone:
102 if len(timezone) != 2:
103 raise TypeError("timezone names must contain 2 items")
104 else:
105 self.__timezone = self.__pad(timezone, False)
106 self.__lang = lang
107
108 def __pad(self, seq, front):
Barry Warsaw35816e62002-08-29 16:24:50 +0000109 # Add '' to seq to either front (is True), else the back.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000110 seq = list(seq)
Barry Warsaw35816e62002-08-29 16:24:50 +0000111 if front:
112 seq.insert(0, '')
113 else:
114 seq.append('')
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000115 return seq
116
117 def __set_nothing(self, stuff):
Barry Warsaw35816e62002-08-29 16:24:50 +0000118 # Raise TypeError when trying to set an attribute.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000119 raise TypeError("attribute does not support assignment")
120
121 def __get_f_weekday(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000122 # Fetch self.f_weekday.
123 if not self.__f_weekday:
124 self.__calc_weekday()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000125 return self.__f_weekday
126
127 def __get_a_weekday(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000128 # Fetch self.a_weekday.
129 if not self.__a_weekday:
130 self.__calc_weekday()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000131 return self.__a_weekday
132
Tim Peters469cdad2002-08-08 20:19:19 +0000133 f_weekday = property(__get_f_weekday, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000134 doc="Full weekday names")
Tim Peters469cdad2002-08-08 20:19:19 +0000135 a_weekday = property(__get_a_weekday, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000136 doc="Abbreviated weekday names")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000137
138 def __get_f_month(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000139 # Fetch self.f_month.
140 if not self.__f_month:
141 self.__calc_month()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000142 return self.__f_month
143
144 def __get_a_month(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000145 # Fetch self.a_month.
146 if not self.__a_month:
147 self.__calc_month()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000148 return self.__a_month
149
150 f_month = property(__get_f_month, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000151 doc="Full month names (dummy value at index 0)")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000152 a_month = property(__get_a_month, __set_nothing,
Barry Warsaw35816e62002-08-29 16:24:50 +0000153 doc="Abbreviated month names (dummy value at index 0)")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000154
155 def __get_am_pm(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000156 # Fetch self.am_pm.
157 if not self.__am_pm:
158 self.__calc_am_pm()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000159 return self.__am_pm
160
161 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
162
163 def __get_timezone(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000164 # Fetch self.timezone.
165 if not self.__timezone:
166 self.__calc_timezone()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000167 return self.__timezone
168
169 timezone = property(__get_timezone, __set_nothing,
170 doc="Timezone representation (dummy value at index 2)")
171
172 def __get_LC_date_time(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000173 # Fetch self.LC_date_time.
174 if not self.__LC_date_time:
175 self.__calc_date_time()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000176 return self.__LC_date_time
177
178 def __get_LC_date(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000179 # Fetch self.LC_date.
180 if not self.__LC_date:
181 self.__calc_date_time()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000182 return self.__LC_date
183
184 def __get_LC_time(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000185 # Fetch self.LC_time.
186 if not self.__LC_time:
187 self.__calc_date_time()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000188 return self.__LC_time
189
Barry Warsaw35816e62002-08-29 16:24:50 +0000190 LC_date_time = property(
191 __get_LC_date_time, __set_nothing,
192 doc=
193 "Format string for locale's date/time representation ('%c' format)")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000194 LC_date = property(__get_LC_date, __set_nothing,
195 doc="Format string for locale's date representation ('%x' format)")
196 LC_time = property(__get_LC_time, __set_nothing,
197 doc="Format string for locale's time representation ('%X' format)")
198
199 def __get_lang(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000200 # Fetch self.lang.
201 if not self.__lang:
202 self.__calc_lang()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000203 return self.__lang
204
Barry Warsaw35816e62002-08-29 16:24:50 +0000205 lang = property(__get_lang, __set_nothing,
206 doc="Language used for instance")
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000207
208 def __calc_weekday(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000209 # Set self.__a_weekday and self.__f_weekday using the calendar
210 # module.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000211 a_weekday = [calendar.day_abbr[i] for i in range(7)]
212 f_weekday = [calendar.day_name[i] for i in range(7)]
Barry Warsaw35816e62002-08-29 16:24:50 +0000213 if not self.__a_weekday:
214 self.__a_weekday = a_weekday
215 if not self.__f_weekday:
216 self.__f_weekday = f_weekday
Tim Peters469cdad2002-08-08 20:19:19 +0000217
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000218 def __calc_month(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000219 # Set self.__f_month and self.__a_month using the calendar module.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000220 a_month = [calendar.month_abbr[i] for i in range(13)]
221 f_month = [calendar.month_name[i] for i in range(13)]
Barry Warsaw35816e62002-08-29 16:24:50 +0000222 if not self.__a_month:
223 self.__a_month = a_month
224 if not self.__f_month:
225 self.__f_month = f_month
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000226
227 def __calc_am_pm(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000228 # Set self.__am_pm by using time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000229
Barry Warsaw35816e62002-08-29 16:24:50 +0000230 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
231 # magical; just happened to have used it everywhere else where a
232 # static date was needed.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000233 am_pm = []
234 for hour in (01,22):
235 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
236 am_pm.append(time.strftime("%p", time_tuple))
237 self.__am_pm = am_pm
238
239 def __calc_date_time(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000240 # Set self.__date_time, self.__date, & self.__time by using
241 # time.strftime().
Tim Peters469cdad2002-08-08 20:19:19 +0000242
Barry Warsaw35816e62002-08-29 16:24:50 +0000243 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
244 # overloaded numbers is minimized. The order in which searches for
245 # values within the format string is very important; it eliminates
246 # possible ambiguity for what something represents.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000247 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
248 date_time = [None, None, None]
249 date_time[0] = time.strftime("%c", time_tuple)
250 date_time[1] = time.strftime("%x", time_tuple)
251 date_time[2] = time.strftime("%X", time_tuple)
252 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
253 current_format = date_time[offset]
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000254 for old, new in (
255 ('%', '%%'), (self.f_weekday[2], '%A'),
256 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
257 (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
258 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'),
259 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
260 ('44', '%M'), ('55', '%S'), ('76', '%j'),
261 ('17', '%d'), ('03', '%m'), ('3', '%m'),
262 # '3' needed for when no leading zero.
263 ('2', '%w'), ('10', '%I')):
Jack Jansen62fe7552003-01-15 22:59:39 +0000264 # Must deal with possible lack of locale info
265 # manifesting itself as the empty string (e.g., Swedish's
266 # lack of AM/PM info) or a platform returning a tuple of empty
267 # strings (e.g., MacOS 9 having timezone as ('','')).
268 if old:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000269 current_format = current_format.replace(old, new)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000270 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
271 if time.strftime(directive, time_tuple).find('00'):
272 U_W = '%U'
273 else:
274 U_W = '%W'
275 date_time[offset] = current_format.replace('11', U_W)
Barry Warsaw35816e62002-08-29 16:24:50 +0000276 if not self.__LC_date_time:
277 self.__LC_date_time = date_time[0]
278 if not self.__LC_date:
279 self.__LC_date = date_time[1]
280 if not self.__LC_time:
281 self.__LC_time = date_time[2]
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000282
283 def __calc_timezone(self):
Barry Warsaw35816e62002-08-29 16:24:50 +0000284 # Set self.__timezone by using time.tzname.
285 #
286 # Empty string used for matching when timezone is not used/needed such
287 # as with UTC.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000288 self.__timezone = self.__pad(time.tzname, 0)
289
290 def __calc_lang(self):
Tim Peters80cebc12003-01-19 04:40:44 +0000291 # Set self.__lang by using __getlang().
292 self.__lang = _getlang()
293
Barry Warsaw35816e62002-08-29 16:24:50 +0000294
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000295
296class TimeRE(dict):
297 """Handle conversion from format directives to regexes."""
298
299 def __init__(self, locale_time=LocaleTime()):
Barry Warsaw35816e62002-08-29 16:24:50 +0000300 """Init inst with non-locale regexes and store LocaleTime object."""
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000301 #XXX: Does 'Y' need to worry about having less or more than 4 digits?
302 base = super(TimeRE, self)
303 base.__init__({
Barry Warsaw35816e62002-08-29 16:24:50 +0000304 # The " \d" option is to make %c from ANSI C work
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000305 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000306 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000307 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
308 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
309 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000310 'M': r"(?P<M>[0-5]\d|\d)",
311 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
312 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
313 'w': r"(?P<w>[0-6])",
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000314 # W is set below by using 'U'
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000315 'y': r"(?P<y>\d\d)",
316 'Y': r"(?P<Y>\d\d\d\d)"})
Neal Norwitz5efc50d2002-12-30 22:23:12 +0000317 base.__setitem__('W', base.__getitem__('U'))
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000318 self.locale_time = locale_time
319
320 def __getitem__(self, fetch):
321 """Try to fetch regex; if it does not exist, construct it."""
322 try:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000323 return super(TimeRE, self).__getitem__(fetch)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000324 except KeyError:
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000325 constructors = {
326 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch),
327 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch),
328 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:],
329 fetch),
330 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:],
331 fetch),
332 'c': lambda: self.pattern(self.locale_time.LC_date_time),
333 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch),
334 'x': lambda: self.pattern(self.locale_time.LC_date),
335 'X': lambda: self.pattern(self.locale_time.LC_time),
336 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch),
337 '%': lambda: '%',
338 }
339 if fetch in constructors:
340 self[fetch] = constructors[fetch]()
341 return self[fetch]
342 else:
343 raise
Tim Peters469cdad2002-08-08 20:19:19 +0000344
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000345 def __seqToRE(self, to_convert, directive):
Jack Jansen62fe7552003-01-15 22:59:39 +0000346 """Convert a list to a regex string for matching a directive."""
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000347 def sorter(a, b):
348 """Sort based on length.
Tim Peters469cdad2002-08-08 20:19:19 +0000349
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000350 Done in case for some strange reason that names in the locale only
351 differ by a suffix and thus want the name with the suffix to match
352 first.
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000353 """
Barry Warsaw35816e62002-08-29 16:24:50 +0000354 try:
355 a_length = len(a)
356 except TypeError:
357 a_length = 0
358 try:
359 b_length = len(b)
360 except TypeError:
361 b_length = 0
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000362 return cmp(b_length, a_length)
Tim Peters469cdad2002-08-08 20:19:19 +0000363
Barry Warsaw35816e62002-08-29 16:24:50 +0000364 to_convert = to_convert[:] # Don't want to change value in-place.
Jack Jansen62fe7552003-01-15 22:59:39 +0000365 for value in to_convert:
366 if value != '':
367 break
368 else:
369 return ''
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000370 to_convert.sort(sorter)
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000371 regex = '|'.join(to_convert)
372 regex = '(?P<%s>%s' % (directive, regex)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000373 return '%s)' % regex
374
375 def pattern(self, format):
Brett Cannon1e91d8e2003-04-19 04:00:56 +0000376 """Return re pattern for the format string.
377
378 Need to make sure that any characters that might be interpreted as
379 regex syntax is escaped.
380
381 """
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000382 processed_format = ''
Brett Cannon1e91d8e2003-04-19 04:00:56 +0000383 # The sub() call escapes all characters that might be misconstrued
384 # as regex syntax.
385 regex_chars = re_compile(r"([\\.^$*+?{}\[\]|])")
386 format = regex_chars.sub(r"\\\1", format)
Tim Peters80cebc12003-01-19 04:40:44 +0000387 whitespace_replacement = re_compile('\s+')
388 format = whitespace_replacement.sub('\s*', format)
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000389 while format.find('%') != -1:
390 directive_index = format.index('%')+1
Tim Peters469cdad2002-08-08 20:19:19 +0000391 processed_format = "%s%s%s" % (processed_format,
Barry Warsaw35816e62002-08-29 16:24:50 +0000392 format[:directive_index-1],
393 self[format[directive_index]])
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000394 format = format[directive_index+1:]
395 return "%s%s" % (processed_format, format)
396
397 def compile(self, format):
398 """Return a compiled re object for the format string."""
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000399 return re_compile(self.pattern(format), IGNORECASE)
400
Tim Peters80cebc12003-01-19 04:40:44 +0000401# Cached TimeRE; probably only need one instance ever so cache it for performance
402_locale_cache = TimeRE()
403# Cached regex objects; same reason as for TimeRE cache
404_regex_cache = dict()
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000405
406def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
Tim Peters08e54272003-01-18 03:53:49 +0000407 """Return a time struct based on the input data and the format string."""
Tim Peters80cebc12003-01-19 04:40:44 +0000408 global _locale_cache
409 global _regex_cache
410 locale_time = _locale_cache.locale_time
411 # If the language changes, caches are invalidated, so clear them
412 if locale_time.lang != _getlang():
413 _locale_cache = TimeRE()
414 _regex_cache.clear()
415 format_regex = _regex_cache.get(format)
416 if not format_regex:
417 # Limit regex cache size to prevent major bloating of the module;
418 # The value 5 is arbitrary
419 if len(_regex_cache) > 5:
420 _regex_cache.clear()
421 format_regex = _locale_cache.compile(format)
422 _regex_cache[format] = format_regex
423 found = format_regex.match(data_string)
Tim Peters08e54272003-01-18 03:53:49 +0000424 if not found:
425 raise ValueError("time data did not match format")
426 year = 1900
427 month = day = 1
428 hour = minute = second = 0
429 tz = -1
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000430 # weekday and julian defaulted to -1 so as to signal need to calculate values
Tim Peters08e54272003-01-18 03:53:49 +0000431 weekday = julian = -1
432 found_dict = found.groupdict()
433 for group_key in found_dict.iterkeys():
434 if group_key == 'y':
435 year = int(found_dict['y'])
436 # Open Group specification for strptime() states that a %y
437 #value in the range of [00, 68] is in the century 2000, while
438 #[69,99] is in the century 1900
439 if year <= 68:
440 year += 2000
441 else:
442 year += 1900
443 elif group_key == 'Y':
444 year = int(found_dict['Y'])
445 elif group_key == 'm':
446 month = int(found_dict['m'])
447 elif group_key == 'B':
448 month = _insensitiveindex(locale_time.f_month, found_dict['B'])
449 elif group_key == 'b':
450 month = _insensitiveindex(locale_time.a_month, found_dict['b'])
451 elif group_key == 'd':
452 day = int(found_dict['d'])
453 elif group_key is 'H':
454 hour = int(found_dict['H'])
455 elif group_key == 'I':
456 hour = int(found_dict['I'])
457 ampm = found_dict.get('p', '').lower()
458 # If there was no AM/PM indicator, we'll treat this like AM
459 if ampm in ('', locale_time.am_pm[0].lower()):
460 # We're in AM so the hour is correct unless we're
461 # looking at 12 midnight.
462 # 12 midnight == 12 AM == hour 0
463 if hour == 12:
464 hour = 0
465 elif ampm == locale_time.am_pm[1].lower():
466 # We're in PM so we need to add 12 to the hour unless
467 # we're looking at 12 noon.
468 # 12 noon == 12 PM == hour 12
469 if hour != 12:
470 hour += 12
471 elif group_key == 'M':
472 minute = int(found_dict['M'])
473 elif group_key == 'S':
474 second = int(found_dict['S'])
475 elif group_key == 'A':
476 weekday = _insensitiveindex(locale_time.f_weekday,
477 found_dict['A'])
478 elif group_key == 'a':
479 weekday = _insensitiveindex(locale_time.a_weekday,
480 found_dict['a'])
481 elif group_key == 'w':
482 weekday = int(found_dict['w'])
483 if weekday == 0:
484 weekday = 6
485 else:
486 weekday -= 1
487 elif group_key == 'j':
488 julian = int(found_dict['j'])
489 elif group_key == 'Z':
490 found_zone = found_dict['Z'].lower()
491 if locale_time.timezone[0] == locale_time.timezone[1]:
492 pass #Deals with bad locale setup where timezone info is
493 # the same; first found on FreeBSD 4.4.
494 elif locale_time.timezone[0].lower() == found_zone:
495 tz = 0
496 elif locale_time.timezone[1].lower() == found_zone:
497 tz = 1
498 elif locale_time.timezone[2].lower() == found_zone:
499 tz = -1
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000500 # Cannot pre-calculate datetime_date() since can change in Julian
501 #calculation and thus could have different value for the day of the week
502 #calculation
Tim Peters08e54272003-01-18 03:53:49 +0000503 if julian == -1:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000504 # Need to add 1 to result since first day of the year is 1, not 0.
505 julian = datetime_date(year, month, day).toordinal() - \
506 datetime_date(year, 1, 1).toordinal() + 1
507 else: # Assume that if they bothered to include Julian day it will
Tim Peters08e54272003-01-18 03:53:49 +0000508 #be accurate
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000509 datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
510 year = datetime_result.year
511 month = datetime_result.month
512 day = datetime_result.day
Tim Peters08e54272003-01-18 03:53:49 +0000513 if weekday == -1:
Raymond Hettinger1fdb6332003-03-09 07:44:42 +0000514 weekday = datetime_date(year, month, day).weekday()
Tim Peters08e54272003-01-18 03:53:49 +0000515 return time.struct_time((year, month, day,
516 hour, minute, second,
517 weekday, julian, tz))
Guido van Rossum00efe7e2002-07-19 17:04:46 +0000518
Barry Warsaw4d895fa2002-09-23 22:46:49 +0000519def _insensitiveindex(lst, findme):
520 # Perform a case-insensitive index search.
521
522 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
523 # just lowercase when LocaleTime sets its vars and lowercasing
524 # search values.
525 findme = findme.lower()
526 for key,item in enumerate(lst):
527 if item.lower() == findme:
528 return key
529 else:
530 raise ValueError("value not in list")
531