blob: bee84cbf8f9f422f226979c5766c473bcb9e5c99 [file] [log] [blame]
Paul Ganssle62972d92020-05-16 04:20:06 -04001#include "Python.h"
2#include "structmember.h"
3
4#include <ctype.h>
5#include <stddef.h>
6#include <stdint.h>
7
8#include "datetime.h"
9
10// Imports
11static PyObject *io_open = NULL;
12static PyObject *_tzpath_find_tzfile = NULL;
13static PyObject *_common_mod = NULL;
14
15typedef struct TransitionRuleType TransitionRuleType;
16typedef struct StrongCacheNode StrongCacheNode;
17
18typedef struct {
19 PyObject *utcoff;
20 PyObject *dstoff;
21 PyObject *tzname;
22 long utcoff_seconds;
23} _ttinfo;
24
25typedef struct {
26 _ttinfo std;
27 _ttinfo dst;
28 int dst_diff;
29 TransitionRuleType *start;
30 TransitionRuleType *end;
31 unsigned char std_only;
32} _tzrule;
33
34typedef struct {
35 PyDateTime_TZInfo base;
36 PyObject *key;
37 PyObject *file_repr;
38 PyObject *weakreflist;
Pablo Galindoe4799b92020-05-27 21:48:12 +010039 size_t num_transitions;
40 size_t num_ttinfos;
Paul Ganssle62972d92020-05-16 04:20:06 -040041 int64_t *trans_list_utc;
42 int64_t *trans_list_wall[2];
43 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
44 _ttinfo *ttinfo_before;
45 _tzrule tzrule_after;
46 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
47 unsigned char fixed_offset;
48 unsigned char source;
49} PyZoneInfo_ZoneInfo;
50
51struct TransitionRuleType {
52 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
53};
54
55typedef struct {
56 TransitionRuleType base;
57 uint8_t month;
58 uint8_t week;
59 uint8_t day;
60 int8_t hour;
61 int8_t minute;
62 int8_t second;
63} CalendarRule;
64
65typedef struct {
66 TransitionRuleType base;
67 uint8_t julian;
68 unsigned int day;
69 int8_t hour;
70 int8_t minute;
71 int8_t second;
72} DayRule;
73
74struct StrongCacheNode {
75 StrongCacheNode *next;
76 StrongCacheNode *prev;
77 PyObject *key;
78 PyObject *zone;
79};
80
81static PyTypeObject PyZoneInfo_ZoneInfoType;
82
83// Globals
84static PyObject *TIMEDELTA_CACHE = NULL;
85static PyObject *ZONEINFO_WEAK_CACHE = NULL;
86static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
87static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
88
89static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
90
91// Constants
92static const int EPOCHORDINAL = 719163;
93static int DAYS_IN_MONTH[] = {
94 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
95};
96
97static int DAYS_BEFORE_MONTH[] = {
98 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
99};
100
101static const int SOURCE_NOCACHE = 0;
102static const int SOURCE_CACHE = 1;
103static const int SOURCE_FILE = 2;
104
105// Forward declarations
106static int
107load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
108static void
109utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
110 unsigned char *isdsts, size_t num_transitions,
111 size_t num_ttinfos);
112static int
113ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
114 int64_t *trans_local[2], size_t num_ttinfos,
115 size_t num_transitions);
116
117static int
118parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
119
Pablo Galindoe4799b92020-05-27 21:48:12 +0100120static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400121parse_abbr(const char *const p, PyObject **abbr);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100122static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400123parse_tz_delta(const char *const p, long *total_seconds);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100124static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400125parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
126 int8_t *second);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100127static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400128parse_transition_rule(const char *const p, TransitionRuleType **out);
129
130static _ttinfo *
131find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
132static _ttinfo *
133find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
134 unsigned char *fold);
135
136static int
137build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
138static void
139xdecref_ttinfo(_ttinfo *ttinfo);
140static int
141ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
142
143static int
144build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
145 long dst_offset, TransitionRuleType *start,
146 TransitionRuleType *end, _tzrule *out);
147static void
148free_tzrule(_tzrule *tzrule);
149
150static PyObject *
151load_timedelta(long seconds);
152
153static int
154get_local_timestamp(PyObject *dt, int64_t *local_ts);
155static _ttinfo *
156find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
157
158static int
159ymd_to_ord(int y, int m, int d);
160static int
161is_leap_year(int year);
162
163static size_t
164_bisect(const int64_t value, const int64_t *arr, size_t size);
165
166static void
167eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
168static void
169clear_strong_cache(const PyTypeObject *const type);
170static void
171update_strong_cache(const PyTypeObject *const type, PyObject *key,
172 PyObject *zone);
173static PyObject *
174zone_from_strong_cache(const PyTypeObject *const type, PyObject *key);
175
176static PyObject *
177zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
178{
179 PyObject *file_obj = NULL;
180 PyObject *file_path = NULL;
181
182 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
183 if (file_path == NULL) {
184 return NULL;
185 }
186 else if (file_path == Py_None) {
187 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
188 if (file_obj == NULL) {
189 Py_DECREF(file_path);
190 return NULL;
191 }
192 }
193
194 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
195 if (self == NULL) {
196 goto error;
197 }
198
199 if (file_obj == NULL) {
200 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
201 if (file_obj == NULL) {
202 goto error;
203 }
204 }
205
206 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
207 goto error;
208 }
209
210 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
211 Py_DECREF(file_obj);
212 file_obj = NULL;
213 if (rv == NULL) {
214 goto error;
215 }
216 Py_DECREF(rv);
217
218 ((PyZoneInfo_ZoneInfo *)self)->key = key;
219 Py_INCREF(key);
220
221 goto cleanup;
222error:
223 Py_XDECREF(self);
224 self = NULL;
225cleanup:
226 if (file_obj != NULL) {
Zackery Spytzeca25492020-07-20 06:51:26 -0600227 PyObject *exc, *val, *tb;
228 PyErr_Fetch(&exc, &val, &tb);
Paul Ganssle62972d92020-05-16 04:20:06 -0400229 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
Zackery Spytzeca25492020-07-20 06:51:26 -0600230 _PyErr_ChainExceptions(exc, val, tb);
231 if (tmp == NULL) {
232 Py_CLEAR(self);
233 }
234 Py_XDECREF(tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400235 Py_DECREF(file_obj);
236 }
237 Py_DECREF(file_path);
238 return self;
239}
240
241static PyObject *
242get_weak_cache(PyTypeObject *type)
243{
244 if (type == &PyZoneInfo_ZoneInfoType) {
245 return ZONEINFO_WEAK_CACHE;
246 }
247 else {
248 PyObject *cache =
249 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
250 // We are assuming that the type lives at least as long as the function
251 // that calls get_weak_cache, and that it holds a reference to the
252 // cache, so we'll return a "borrowed reference".
253 Py_XDECREF(cache);
254 return cache;
255 }
256}
257
258static PyObject *
259zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
260{
261 PyObject *key = NULL;
262 static char *kwlist[] = {"key", NULL};
263 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
264 return NULL;
265 }
266
267 PyObject *instance = zone_from_strong_cache(type, key);
268 if (instance != NULL) {
269 return instance;
270 }
271
272 PyObject *weak_cache = get_weak_cache(type);
273 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
274 if (instance == NULL) {
275 return NULL;
276 }
277
278 if (instance == Py_None) {
279 Py_DECREF(instance);
280 PyObject *tmp = zoneinfo_new_instance(type, key);
281 if (tmp == NULL) {
282 return NULL;
283 }
284
285 instance =
286 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400287 Py_DECREF(tmp);
Paul Ganssle62972d92020-05-16 04:20:06 -0400288 if (instance == NULL) {
289 return NULL;
290 }
Gregory P. Smithd780fa72020-06-22 00:39:28 -0700291 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
Paul Ganssle62972d92020-05-16 04:20:06 -0400292 }
293
294 update_strong_cache(type, key, instance);
295 return instance;
296}
297
298static void
299zoneinfo_dealloc(PyObject *obj_self)
300{
301 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
302
303 if (self->weakreflist != NULL) {
304 PyObject_ClearWeakRefs(obj_self);
305 }
306
307 if (self->trans_list_utc != NULL) {
308 PyMem_Free(self->trans_list_utc);
309 }
310
311 for (size_t i = 0; i < 2; i++) {
312 if (self->trans_list_wall[i] != NULL) {
313 PyMem_Free(self->trans_list_wall[i]);
314 }
315 }
316
317 if (self->_ttinfos != NULL) {
318 for (size_t i = 0; i < self->num_ttinfos; ++i) {
319 xdecref_ttinfo(&(self->_ttinfos[i]));
320 }
321 PyMem_Free(self->_ttinfos);
322 }
323
324 if (self->trans_ttinfos != NULL) {
325 PyMem_Free(self->trans_ttinfos);
326 }
327
328 free_tzrule(&(self->tzrule_after));
329
330 Py_XDECREF(self->key);
331 Py_XDECREF(self->file_repr);
332
333 Py_TYPE(self)->tp_free((PyObject *)self);
334}
335
336static PyObject *
337zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
338{
339 PyObject *file_obj = NULL;
340 PyObject *file_repr = NULL;
341 PyObject *key = Py_None;
342 PyZoneInfo_ZoneInfo *self = NULL;
343
344 static char *kwlist[] = {"", "key", NULL};
345 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
346 &key)) {
347 return NULL;
348 }
349
350 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
351 self = (PyZoneInfo_ZoneInfo *)obj_self;
352 if (self == NULL) {
353 return NULL;
354 }
355
356 file_repr = PyUnicode_FromFormat("%R", file_obj);
357 if (file_repr == NULL) {
358 goto error;
359 }
360
361 if (load_data(self, file_obj)) {
362 goto error;
363 }
364
365 self->source = SOURCE_FILE;
366 self->file_repr = file_repr;
367 self->key = key;
368 Py_INCREF(key);
369
370 return obj_self;
371error:
372 Py_XDECREF(file_repr);
373 Py_XDECREF(self);
374 return NULL;
375}
376
377static PyObject *
378zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
379{
380 static char *kwlist[] = {"key", NULL};
381 PyObject *key = NULL;
382 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
383 return NULL;
384 }
385
386 PyObject *out = zoneinfo_new_instance(cls, key);
387 if (out != NULL) {
388 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
389 }
390
391 return out;
392}
393
394static PyObject *
395zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
396{
397 PyObject *only_keys = NULL;
398 static char *kwlist[] = {"only_keys", NULL};
399
400 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
401 &only_keys))) {
402 return NULL;
403 }
404
405 PyTypeObject *type = (PyTypeObject *)cls;
406 PyObject *weak_cache = get_weak_cache(type);
407
408 if (only_keys == NULL || only_keys == Py_None) {
409 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
410 if (rv != NULL) {
411 Py_DECREF(rv);
412 }
413
414 clear_strong_cache(type);
415 ZONEINFO_STRONG_CACHE = NULL;
416 }
417 else {
418 PyObject *item = NULL;
419 PyObject *pop = PyUnicode_FromString("pop");
420 if (pop == NULL) {
421 return NULL;
422 }
423
424 PyObject *iter = PyObject_GetIter(only_keys);
425 if (iter == NULL) {
426 Py_DECREF(pop);
427 return NULL;
428 }
429
430 while ((item = PyIter_Next(iter))) {
431 // Remove from strong cache
432 eject_from_strong_cache(type, item);
433
434 // Remove from weak cache
435 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
436 Py_None, NULL);
437
438 Py_DECREF(item);
439 if (tmp == NULL) {
440 break;
441 }
442 Py_DECREF(tmp);
443 }
444 Py_DECREF(iter);
445 Py_DECREF(pop);
446 }
447
448 if (PyErr_Occurred()) {
449 return NULL;
450 }
451
452 Py_RETURN_NONE;
453}
454
455static PyObject *
456zoneinfo_utcoffset(PyObject *self, PyObject *dt)
457{
458 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
459 if (tti == NULL) {
460 return NULL;
461 }
462 Py_INCREF(tti->utcoff);
463 return tti->utcoff;
464}
465
466static PyObject *
467zoneinfo_dst(PyObject *self, PyObject *dt)
468{
469 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
470 if (tti == NULL) {
471 return NULL;
472 }
473 Py_INCREF(tti->dstoff);
474 return tti->dstoff;
475}
476
477static PyObject *
478zoneinfo_tzname(PyObject *self, PyObject *dt)
479{
480 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
481 if (tti == NULL) {
482 return NULL;
483 }
484 Py_INCREF(tti->tzname);
485 return tti->tzname;
486}
487
488#define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo)
489#define GET_DT_TZINFO(p) \
490 (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None)
491
492static PyObject *
493zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
494{
495 if (!PyDateTime_Check(dt)) {
496 PyErr_SetString(PyExc_TypeError,
497 "fromutc: argument must be a datetime");
498 return NULL;
499 }
500 if (GET_DT_TZINFO(dt) != obj_self) {
501 PyErr_SetString(PyExc_ValueError,
502 "fromutc: dt.tzinfo "
503 "is not self");
504 return NULL;
505 }
506
507 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
508
509 int64_t timestamp;
510 if (get_local_timestamp(dt, &timestamp)) {
511 return NULL;
512 }
513 size_t num_trans = self->num_transitions;
514
515 _ttinfo *tti = NULL;
516 unsigned char fold = 0;
517
518 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
519 tti = self->ttinfo_before;
520 }
521 else if (num_trans == 0 ||
522 timestamp > self->trans_list_utc[num_trans - 1]) {
523 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
524 PyDateTime_GET_YEAR(dt), &fold);
525
526 // Immediately after the last manual transition, the fold/gap is
527 // between self->trans_ttinfos[num_transitions - 1] and whatever
528 // ttinfo applies immediately after the last transition, not between
529 // the STD and DST rules in the tzrule_after, so we may need to
530 // adjust the fold value.
531 if (num_trans) {
532 _ttinfo *tti_prev = NULL;
533 if (num_trans == 1) {
534 tti_prev = self->ttinfo_before;
535 }
536 else {
537 tti_prev = self->trans_ttinfos[num_trans - 2];
538 }
539 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
540 if (diff > 0 &&
541 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
542 fold = 1;
543 }
544 }
545 }
546 else {
547 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
548 _ttinfo *tti_prev = NULL;
549
550 if (idx >= 2) {
551 tti_prev = self->trans_ttinfos[idx - 2];
552 tti = self->trans_ttinfos[idx - 1];
553 }
554 else {
555 tti_prev = self->ttinfo_before;
556 tti = self->trans_ttinfos[0];
557 }
558
559 // Detect fold
560 int64_t shift =
561 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
562 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
563 fold = 1;
564 }
565 }
566
567 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
568 if (tmp == NULL) {
569 return NULL;
570 }
571
572 if (fold) {
573 if (PyDateTime_CheckExact(tmp)) {
574 ((PyDateTime_DateTime *)tmp)->fold = 1;
575 dt = tmp;
576 }
577 else {
578 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
579 PyObject *args = PyTuple_New(0);
580 PyObject *kwargs = PyDict_New();
581
582 Py_DECREF(tmp);
583 if (args == NULL || kwargs == NULL || replace == NULL) {
584 Py_XDECREF(args);
585 Py_XDECREF(kwargs);
586 Py_XDECREF(replace);
587 return NULL;
588 }
589
590 dt = NULL;
591 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) {
592 dt = PyObject_Call(replace, args, kwargs);
593 }
594
595 Py_DECREF(args);
596 Py_DECREF(kwargs);
597 Py_DECREF(replace);
598
599 if (dt == NULL) {
600 return NULL;
601 }
602 }
603 }
604 else {
605 dt = tmp;
606 }
607 return dt;
608}
609
610static PyObject *
611zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
612{
613 PyObject *rv = NULL;
614 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
615 if (!(self->key == Py_None)) {
616 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
617 }
618 else {
619 assert(PyUnicode_Check(self->file_repr));
620 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
621 self->file_repr);
622 }
623
624 return rv;
625}
626
627static PyObject *
628zoneinfo_str(PyZoneInfo_ZoneInfo *self)
629{
630 if (!(self->key == Py_None)) {
631 Py_INCREF(self->key);
632 return self->key;
633 }
634 else {
635 return zoneinfo_repr(self);
636 }
637}
638
639/* Pickles the ZoneInfo object by key and source.
640 *
641 * ZoneInfo objects are pickled by reference to the TZif file that they came
642 * from, which means that the exact transitions may be different or the file
643 * may not un-pickle if the data has changed on disk in the interim.
644 *
645 * It is necessary to include a bit indicating whether or not the object
646 * was constructed from the cache, because from-cache objects will hit the
647 * unpickling process's cache, whereas no-cache objects will bypass it.
648 *
649 * Objects constructed from ZoneInfo.from_file cannot be pickled.
650 */
651static PyObject *
652zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
653{
654 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
655 if (self->source == SOURCE_FILE) {
656 // Objects constructed from files cannot be pickled.
657 PyObject *pickle = PyImport_ImportModule("pickle");
658 if (pickle == NULL) {
659 return NULL;
660 }
661
662 PyObject *pickle_error =
663 PyObject_GetAttrString(pickle, "PicklingError");
664 Py_DECREF(pickle);
665 if (pickle_error == NULL) {
666 return NULL;
667 }
668
669 PyErr_Format(pickle_error,
670 "Cannot pickle a ZoneInfo file from a file stream.");
671 Py_DECREF(pickle_error);
672 return NULL;
673 }
674
675 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
676 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
677
678 if (constructor == NULL) {
679 return NULL;
680 }
681
682 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
683 Py_DECREF(constructor);
684 return rv;
685}
686
687static PyObject *
688zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
689{
690 PyObject *key;
691 unsigned char from_cache;
692 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
693 return NULL;
694 }
695
696 if (from_cache) {
697 PyObject *val_args = Py_BuildValue("(O)", key);
698 if (val_args == NULL) {
699 return NULL;
700 }
701
702 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
703
704 Py_DECREF(val_args);
705 return rv;
706 }
707 else {
708 return zoneinfo_new_instance(cls, key);
709 }
710}
711
712/* It is relatively expensive to construct new timedelta objects, and in most
713 * cases we're looking at a relatively small number of timedeltas, such as
714 * integer number of hours, etc. We will keep a cache so that we construct
715 * a minimal number of these.
716 *
717 * Possibly this should be replaced with an LRU cache so that it's not possible
718 * for the memory usage to explode from this, but in order for this to be a
719 * serious problem, one would need to deliberately craft a malicious time zone
720 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
721 * fills the cache with ~450 timedeltas for a total size of ~12kB.
722 *
723 * This returns a new reference to the timedelta.
724 */
725static PyObject *
726load_timedelta(long seconds)
727{
728 PyObject *rv = NULL;
729 PyObject *pyoffset = PyLong_FromLong(seconds);
730 if (pyoffset == NULL) {
731 return NULL;
732 }
733 int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset);
734 if (contains == -1) {
735 goto error;
736 }
737
738 if (!contains) {
739 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
740 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
741
742 if (tmp == NULL) {
743 goto error;
744 }
745
746 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
747 Py_DECREF(tmp);
748 }
749 else {
750 rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset);
751 }
752
753 Py_DECREF(pyoffset);
754 Py_INCREF(rv);
755 return rv;
756error:
757 Py_DECREF(pyoffset);
758 return NULL;
759}
760
761/* Constructor for _ttinfo object - this starts by initializing the _ttinfo
762 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
763 * initialized _ttinfo objects.
764 */
765static int
766build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
767{
768 out->utcoff = NULL;
769 out->dstoff = NULL;
770 out->tzname = NULL;
771
772 out->utcoff_seconds = utcoffset;
773 out->utcoff = load_timedelta(utcoffset);
774 if (out->utcoff == NULL) {
775 return -1;
776 }
777
778 out->dstoff = load_timedelta(dstoffset);
779 if (out->dstoff == NULL) {
780 return -1;
781 }
782
783 out->tzname = tzname;
784 Py_INCREF(tzname);
785
786 return 0;
787}
788
789/* Decrease reference count on any non-NULL members of a _ttinfo */
790static void
791xdecref_ttinfo(_ttinfo *ttinfo)
792{
793 if (ttinfo != NULL) {
794 Py_XDECREF(ttinfo->utcoff);
795 Py_XDECREF(ttinfo->dstoff);
796 Py_XDECREF(ttinfo->tzname);
797 }
798}
799
800/* Equality function for _ttinfo. */
801static int
802ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
803{
804 int rv;
805 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
806 1) {
807 goto end;
808 }
809
810 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
811 1) {
812 goto end;
813 }
814
815 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
816 1) {
817 goto end;
818 }
819end:
820 return rv;
821}
822
823/* Given a file-like object, this populates a ZoneInfo object
824 *
825 * The current version calls into a Python function to read the data from
826 * file into Python objects, and this translates those Python objects into
827 * C values and calculates derived values (e.g. dstoff) in C.
828 *
829 * This returns 0 on success and -1 on failure.
830 *
831 * The function will never return while `self` is partially initialized —
832 * the object only needs to be freed / deallocated if this succeeds.
833 */
834static int
835load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
836{
837 PyObject *data_tuple = NULL;
838
839 long *utcoff = NULL;
840 long *dstoff = NULL;
841 size_t *trans_idx = NULL;
842 unsigned char *isdst = NULL;
843
844 self->trans_list_utc = NULL;
845 self->trans_list_wall[0] = NULL;
846 self->trans_list_wall[1] = NULL;
847 self->trans_ttinfos = NULL;
848 self->_ttinfos = NULL;
849 self->file_repr = NULL;
850
851 size_t ttinfos_allocated = 0;
852
853 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
854
855 if (data_tuple == NULL) {
856 goto error;
857 }
858
859 if (!PyTuple_CheckExact(data_tuple)) {
860 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
861 data_tuple);
862 goto error;
863 }
864
865 // Unpack the data tuple
866 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
867 if (trans_idx_list == NULL) {
868 goto error;
869 }
870
871 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
872 if (trans_utc == NULL) {
873 goto error;
874 }
875
876 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
877 if (utcoff_list == NULL) {
878 goto error;
879 }
880
881 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
882 if (isdst_list == NULL) {
883 goto error;
884 }
885
886 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
887 if (abbr == NULL) {
888 goto error;
889 }
890
891 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
892 if (tz_str == NULL) {
893 goto error;
894 }
895
896 // Load the relevant sizes
897 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100898 if (num_transitions < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400899 goto error;
900 }
901
902 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100903 if (num_ttinfos < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400904 goto error;
905 }
906
907 self->num_transitions = (size_t)num_transitions;
908 self->num_ttinfos = (size_t)num_ttinfos;
909
910 // Load the transition indices and list
911 self->trans_list_utc =
912 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
913 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
914
Pablo Galindoe4799b92020-05-27 21:48:12 +0100915 for (size_t i = 0; i < self->num_transitions; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400916 PyObject *num = PyTuple_GetItem(trans_utc, i);
917 if (num == NULL) {
918 goto error;
919 }
920 self->trans_list_utc[i] = PyLong_AsLongLong(num);
921 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
922 goto error;
923 }
924
925 num = PyTuple_GetItem(trans_idx_list, i);
926 if (num == NULL) {
927 goto error;
928 }
929
930 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
931 if (cur_trans_idx == -1) {
932 goto error;
933 }
934
935 trans_idx[i] = (size_t)cur_trans_idx;
936 if (trans_idx[i] > self->num_ttinfos) {
937 PyErr_Format(
938 PyExc_ValueError,
939 "Invalid transition index found while reading TZif: %zd",
940 cur_trans_idx);
941
942 goto error;
943 }
944 }
945
946 // Load UTC offsets and isdst (size num_ttinfos)
947 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
948 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
949
950 if (utcoff == NULL || isdst == NULL) {
951 goto error;
952 }
Pablo Galindoe4799b92020-05-27 21:48:12 +0100953 for (size_t i = 0; i < self->num_ttinfos; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400954 PyObject *num = PyTuple_GetItem(utcoff_list, i);
955 if (num == NULL) {
956 goto error;
957 }
958
959 utcoff[i] = PyLong_AsLong(num);
960 if (utcoff[i] == -1 && PyErr_Occurred()) {
961 goto error;
962 }
963
964 num = PyTuple_GetItem(isdst_list, i);
965 if (num == NULL) {
966 goto error;
967 }
968
969 int isdst_with_error = PyObject_IsTrue(num);
970 if (isdst_with_error == -1) {
971 goto error;
972 }
973 else {
974 isdst[i] = (unsigned char)isdst_with_error;
975 }
976 }
977
978 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
979 if (dstoff == NULL) {
980 goto error;
981 }
982
983 // Derive dstoff and trans_list_wall from the information we've loaded
984 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
985 self->num_ttinfos);
986
987 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
988 self->trans_list_wall, self->num_ttinfos,
989 self->num_transitions)) {
990 goto error;
991 }
992
993 // Build _ttinfo objects from utcoff, dstoff and abbr
994 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
995 for (size_t i = 0; i < self->num_ttinfos; ++i) {
996 PyObject *tzname = PyTuple_GetItem(abbr, i);
997 if (tzname == NULL) {
998 goto error;
999 }
1000
1001 ttinfos_allocated++;
1002 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1003 goto error;
1004 }
1005 }
1006
1007 // Build our mapping from transition to the ttinfo that applies
1008 self->trans_ttinfos =
1009 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1010 for (size_t i = 0; i < self->num_transitions; ++i) {
1011 size_t ttinfo_idx = trans_idx[i];
1012 assert(ttinfo_idx < self->num_ttinfos);
1013 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1014 }
1015
1016 // Set ttinfo_before to the first non-DST transition
1017 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1018 if (!isdst[i]) {
1019 self->ttinfo_before = &(self->_ttinfos[i]);
1020 break;
1021 }
1022 }
1023
1024 // If there are only DST ttinfos, pick the first one, if there are no
1025 // ttinfos at all, set ttinfo_before to NULL
1026 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1027 self->ttinfo_before = &(self->_ttinfos[0]);
1028 }
1029
1030 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1031 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1032 goto error;
1033 }
1034 }
1035 else {
1036 if (!self->num_ttinfos) {
1037 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1038 goto error;
1039 }
1040
1041 size_t idx;
1042 if (!self->num_transitions) {
1043 idx = self->num_ttinfos - 1;
1044 }
1045 else {
1046 idx = trans_idx[self->num_transitions - 1];
1047 }
1048
1049 _ttinfo *tti = &(self->_ttinfos[idx]);
1050 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1051 &(self->tzrule_after));
1052
1053 // We've abused the build_tzrule constructor to construct an STD-only
1054 // rule mimicking whatever ttinfo we've picked up, but it's possible
1055 // that the one we've picked up is a DST zone, so we need to make sure
1056 // that the dstoff is set correctly in that case.
1057 if (PyObject_IsTrue(tti->dstoff)) {
1058 _ttinfo *tti_after = &(self->tzrule_after.std);
1059 Py_DECREF(tti_after->dstoff);
1060 tti_after->dstoff = tti->dstoff;
1061 Py_INCREF(tti_after->dstoff);
1062 }
1063 }
1064
1065 // Determine if this is a "fixed offset" zone, meaning that the output of
1066 // the utcoffset, dst and tzname functions does not depend on the specific
1067 // datetime passed.
1068 //
1069 // We make three simplifying assumptions here:
1070 //
1071 // 1. If tzrule_after is not std_only, it has transitions that might occur
1072 // (it is possible to construct TZ strings that specify STD and DST but
1073 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1074 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1075 // represent different offsets.
1076 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1077 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1078 // a fixed offset zone).
1079 //
1080 // Violations to these assumptions would be fairly exotic, and exotic
1081 // zones should almost certainly not be used with datetime.time (the
1082 // only thing that would be affected by this).
1083 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1084 self->fixed_offset = 0;
1085 }
1086 else if (self->num_ttinfos == 0) {
1087 self->fixed_offset = 1;
1088 }
1089 else {
1090 int constant_offset =
1091 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1092 if (constant_offset < 0) {
1093 goto error;
1094 }
1095 else {
1096 self->fixed_offset = constant_offset;
1097 }
1098 }
1099
1100 int rv = 0;
1101 goto cleanup;
1102error:
1103 // These resources only need to be freed if we have failed, if we succeed
1104 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1105 // method to free the relevant resources.
1106 if (self->trans_list_utc != NULL) {
1107 PyMem_Free(self->trans_list_utc);
1108 self->trans_list_utc = NULL;
1109 }
1110
1111 for (size_t i = 0; i < 2; ++i) {
1112 if (self->trans_list_wall[i] != NULL) {
1113 PyMem_Free(self->trans_list_wall[i]);
1114 self->trans_list_wall[i] = NULL;
1115 }
1116 }
1117
1118 if (self->_ttinfos != NULL) {
1119 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1120 xdecref_ttinfo(&(self->_ttinfos[i]));
1121 }
1122 PyMem_Free(self->_ttinfos);
1123 self->_ttinfos = NULL;
1124 }
1125
1126 if (self->trans_ttinfos != NULL) {
1127 PyMem_Free(self->trans_ttinfos);
1128 self->trans_ttinfos = NULL;
1129 }
1130
1131 rv = -1;
1132cleanup:
1133 Py_XDECREF(data_tuple);
1134
1135 if (utcoff != NULL) {
1136 PyMem_Free(utcoff);
1137 }
1138
1139 if (dstoff != NULL) {
1140 PyMem_Free(dstoff);
1141 }
1142
1143 if (isdst != NULL) {
1144 PyMem_Free(isdst);
1145 }
1146
1147 if (trans_idx != NULL) {
1148 PyMem_Free(trans_idx);
1149 }
1150
1151 return rv;
1152}
1153
1154/* Function to calculate the local timestamp of a transition from the year. */
1155int64_t
1156calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1157{
1158 CalendarRule *self = (CalendarRule *)base_self;
1159
1160 // We want (year, month, day of month); we have year and month, but we
1161 // need to turn (week, day-of-week) into day-of-month
1162 //
1163 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1164 // Week 5 represents the last occurrence of day `day`, so we need to know
1165 // the first weekday of the month and the number of days in the month.
1166 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1167 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1168 if (self->month == 2 && is_leap_year(year)) {
1169 days_in_month += 1;
1170 }
1171
1172 // This equation seems magical, so I'll break it down:
1173 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1174 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1175 // because this math is mod 7
1176 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1177 // numbers so that -1 % 7 = 6).
1178 // 3. Add 1 because month days are a 1-based index.
1179 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1180 if (month_day < 0) {
1181 month_day += 7;
1182 }
1183 month_day += 1;
1184
1185 // Now use a 0-based index version of `week` to calculate the w-th
1186 // occurrence of `day`
1187 month_day += ((int8_t)(self->week) - 1) * 7;
1188
1189 // month_day will only be > days_in_month if w was 5, and `w` means "last
1190 // occurrence of `d`", so now we just check if we over-shot the end of the
1191 // month and if so knock off 1 week.
1192 if (month_day > days_in_month) {
1193 month_day -= 7;
1194 }
1195
1196 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1197 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1198 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1199}
1200
1201/* Constructor for CalendarRule. */
1202int
1203calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1204 int8_t minute, int8_t second, CalendarRule *out)
1205{
1206 // These bounds come from the POSIX standard, which describes an Mm.n.d
1207 // rule as:
1208 //
1209 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1210 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1211 // may occur in either the fourth or the fifth week). Week 1 is the first
1212 // week in which the d'th day occurs. Day zero is Sunday.
1213 if (month <= 0 || month > 12) {
1214 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1215 return -1;
1216 }
1217
1218 if (week <= 0 || week > 5) {
1219 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1220 return -1;
1221 }
1222
1223 // day is an unsigned integer, so day < 0 should always return false, but
1224 // if day's type changes to a signed integer *without* changing this value,
1225 // it may create a bug. Considering that the compiler should be able to
1226 // optimize out the first comparison if day is an unsigned integer anyway,
1227 // we will leave this comparison in place and disable the compiler warning.
1228#pragma GCC diagnostic push
1229#pragma GCC diagnostic ignored "-Wtype-limits"
1230 if (day < 0 || day > 6) {
1231#pragma GCC diagnostic pop
1232 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1233 return -1;
1234 }
1235
1236 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1237
1238 CalendarRule new_offset = {
1239 .base = base,
1240 .month = month,
1241 .week = week,
1242 .day = day,
1243 .hour = hour,
1244 .minute = minute,
1245 .second = second,
1246 };
1247
1248 *out = new_offset;
1249 return 0;
1250}
1251
1252/* Function to calculate the local timestamp of a transition from the year.
1253 *
1254 * This translates the day of the year into a local timestamp — either a
1255 * 1-based Julian day, not including leap days, or the 0-based year-day,
1256 * including leap days.
1257 * */
1258int64_t
1259dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1260{
1261 // The function signature requires a TransitionRuleType pointer, but this
1262 // function is only applicable to DayRule* objects.
1263 DayRule *self = (DayRule *)base_self;
1264
1265 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1266 // to know the number of days since 1970-01-01, so we must subtract off
1267 // the equivalent of ymd_to_ord(1970, 1, 1).
1268 //
1269 // We subtract off an additional 1 day to account for January 1st (we want
1270 // the number of full days *before* the date of the transition - partial
1271 // days are accounted for in the hour, minute and second portions.
1272 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1273
1274 // The Julian day specification skips over February 29th in leap years,
1275 // from the POSIX standard:
1276 //
1277 // Leap days shall not be counted. That is, in all years-including leap
1278 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1279 // refer explicitly to the occasional February 29.
1280 //
1281 // This is actually more useful than you'd think — if you want a rule that
1282 // always transitions on a given calendar day (other than February 29th),
1283 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1284 // always refers to December 31st.
1285 unsigned int day = self->day;
1286 if (self->julian && day >= 59 && is_leap_year(year)) {
1287 day += 1;
1288 }
1289
1290 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1291 (self->minute * 60) + self->second;
1292}
1293
1294/* Constructor for DayRule. */
1295static int
1296dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1297 int8_t second, DayRule *out)
1298{
1299 // The POSIX standard specifies that Julian days must be in the range (1 <=
1300 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1301 // be in the range (0 <= n <= 365).
1302 if (day < julian || day > 365) {
1303 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1304 julian, day);
1305 return -1;
1306 }
1307
1308 TransitionRuleType base = {
1309 &dayrule_year_to_timestamp,
1310 };
1311
1312 DayRule tmp = {
1313 .base = base,
1314 .julian = julian,
1315 .day = day,
1316 .hour = hour,
1317 .minute = minute,
1318 .second = second,
1319 };
1320
1321 *out = tmp;
1322
1323 return 0;
1324}
1325
1326/* Calculate the start and end rules for a _tzrule in the given year. */
1327static void
1328tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1329{
1330 assert(rule->start != NULL);
1331 assert(rule->end != NULL);
1332 *start = rule->start->year_to_timestamp(rule->start, year);
1333 *end = rule->end->year_to_timestamp(rule->end, year);
1334}
1335
1336/* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1337 *
1338 * This takes a local timestamp and fold for disambiguation purposes; the year
1339 * could technically be calculated from the timestamp, but given that the
1340 * callers of this function already have the year information accessible from
1341 * the datetime struct, it is taken as an additional parameter to reduce
1342 * unncessary calculation.
1343 * */
1344static _ttinfo *
1345find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1346{
1347 if (rule->std_only) {
1348 return &(rule->std);
1349 }
1350
1351 int64_t start, end;
1352 uint8_t isdst;
1353
1354 tzrule_transitions(rule, year, &start, &end);
1355
1356 // With fold = 0, the period (denominated in local time) with the smaller
1357 // offset starts at the end of the gap and ends at the end of the fold;
1358 // with fold = 1, it runs from the start of the gap to the beginning of the
1359 // fold.
1360 //
1361 // So in order to determine the DST boundaries we need to know both the
1362 // fold and whether DST is positive or negative (rare), and it turns out
1363 // that this boils down to fold XOR is_positive.
1364 if (fold == (rule->dst_diff >= 0)) {
1365 end -= rule->dst_diff;
1366 }
1367 else {
1368 start += rule->dst_diff;
1369 }
1370
1371 if (start < end) {
1372 isdst = (ts >= start) && (ts < end);
1373 }
1374 else {
1375 isdst = (ts < end) || (ts >= start);
1376 }
1377
1378 if (isdst) {
1379 return &(rule->dst);
1380 }
1381 else {
1382 return &(rule->std);
1383 }
1384}
1385
1386/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1387 *
1388 * This function can determine the _ttinfo that applies at a given epoch time,
1389 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1390 * This is to be used in the .fromutc() function.
1391 *
1392 * The year is technically a redundant parameter, because it can be calculated
1393 * from the timestamp, but all callers of this function should have the year
1394 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1395 * calculation.
1396 **/
1397static _ttinfo *
1398find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1399 unsigned char *fold)
1400{
1401 if (rule->std_only) {
1402 *fold = 0;
1403 return &(rule->std);
1404 }
1405
1406 int64_t start, end;
1407 uint8_t isdst;
1408 tzrule_transitions(rule, year, &start, &end);
1409 start -= rule->std.utcoff_seconds;
1410 end -= rule->dst.utcoff_seconds;
1411
1412 if (start < end) {
1413 isdst = (ts >= start) && (ts < end);
1414 }
1415 else {
1416 isdst = (ts < end) || (ts >= start);
1417 }
1418
1419 // For positive DST, the ambiguous period is one dst_diff after the end of
1420 // DST; for negative DST, the ambiguous period is one dst_diff before the
1421 // start of DST.
1422 int64_t ambig_start, ambig_end;
1423 if (rule->dst_diff > 0) {
1424 ambig_start = end;
1425 ambig_end = end + rule->dst_diff;
1426 }
1427 else {
1428 ambig_start = start;
1429 ambig_end = start - rule->dst_diff;
1430 }
1431
1432 *fold = (ts >= ambig_start) && (ts < ambig_end);
1433
1434 if (isdst) {
1435 return &(rule->dst);
1436 }
1437 else {
1438 return &(rule->std);
1439 }
1440}
1441
1442/* Parse a TZ string in the format specified by the POSIX standard:
1443 *
1444 * std offset[dst[offset],start[/time],end[/time]]
1445 *
1446 * std and dst must be 3 or more characters long and must not contain a
1447 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1448 * spaces between "std" and "offset" are only for display and are not actually
1449 * present in the string.
1450 *
1451 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1452 *
1453 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1454 *
1455 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1456 */
1457static int
1458parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1459{
1460 PyObject *std_abbr = NULL;
1461 PyObject *dst_abbr = NULL;
1462 TransitionRuleType *start = NULL;
1463 TransitionRuleType *end = NULL;
Dong-hee Naa487a392020-05-22 01:56:03 +09001464 // Initialize offsets to invalid value (> 24 hours)
1465 long std_offset = 1 << 20;
1466 long dst_offset = 1 << 20;
Paul Ganssle62972d92020-05-16 04:20:06 -04001467
1468 char *tz_str = PyBytes_AsString(tz_str_obj);
1469 if (tz_str == NULL) {
1470 return -1;
1471 }
1472 char *p = tz_str;
1473
1474 // Read the `std` abbreviation, which must be at least 3 characters long.
Pablo Galindoe4799b92020-05-27 21:48:12 +01001475 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
Paul Ganssle62972d92020-05-16 04:20:06 -04001476 if (num_chars < 1) {
1477 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1478 goto error;
1479 }
1480
1481 p += num_chars;
1482
1483 // Now read the STD offset, which is required
1484 num_chars = parse_tz_delta(p, &std_offset);
1485 if (num_chars < 0) {
1486 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1487 goto error;
1488 }
1489 p += num_chars;
1490
1491 // If the string ends here, there is no DST, otherwise we must parse the
1492 // DST abbreviation and start and end dates and times.
1493 if (*p == '\0') {
1494 goto complete;
1495 }
1496
1497 num_chars = parse_abbr(p, &dst_abbr);
1498 if (num_chars < 1) {
1499 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1500 goto error;
1501 }
1502 p += num_chars;
1503
1504 if (*p == ',') {
1505 // From the POSIX standard:
1506 //
1507 // If no offset follows dst, the alternative time is assumed to be one
1508 // hour ahead of standard time.
1509 dst_offset = std_offset + 3600;
1510 }
1511 else {
1512 num_chars = parse_tz_delta(p, &dst_offset);
1513 if (num_chars < 0) {
1514 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1515 tz_str_obj);
1516 goto error;
1517 }
1518
1519 p += num_chars;
1520 }
1521
1522 TransitionRuleType **transitions[2] = {&start, &end};
1523 for (size_t i = 0; i < 2; ++i) {
1524 if (*p != ',') {
1525 PyErr_Format(PyExc_ValueError,
1526 "Missing transition rules in TZ string: %R",
1527 tz_str_obj);
1528 goto error;
1529 }
1530 p++;
1531
1532 num_chars = parse_transition_rule(p, transitions[i]);
1533 if (num_chars < 0) {
1534 PyErr_Format(PyExc_ValueError,
1535 "Malformed transition rule in TZ string: %R",
1536 tz_str_obj);
1537 goto error;
1538 }
1539 p += num_chars;
1540 }
1541
1542 if (*p != '\0') {
1543 PyErr_Format(PyExc_ValueError,
1544 "Extraneous characters at end of TZ string: %R",
1545 tz_str_obj);
1546 goto error;
1547 }
1548
1549complete:
1550 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1551 Py_DECREF(std_abbr);
1552 Py_XDECREF(dst_abbr);
1553
1554 return 0;
1555error:
1556 Py_XDECREF(std_abbr);
1557 if (dst_abbr != NULL && dst_abbr != Py_None) {
1558 Py_DECREF(dst_abbr);
1559 }
1560
1561 if (start != NULL) {
1562 PyMem_Free(start);
1563 }
1564
1565 if (end != NULL) {
1566 PyMem_Free(end);
1567 }
1568
1569 return -1;
1570}
1571
Pablo Galindoe4799b92020-05-27 21:48:12 +01001572static int
1573parse_uint(const char *const p, uint8_t *value)
Paul Ganssle62972d92020-05-16 04:20:06 -04001574{
1575 if (!isdigit(*p)) {
1576 return -1;
1577 }
1578
Pablo Galindoe4799b92020-05-27 21:48:12 +01001579 *value = (*p) - '0';
1580 return 0;
Paul Ganssle62972d92020-05-16 04:20:06 -04001581}
1582
1583/* Parse the STD and DST abbreviations from a TZ string. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001584static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001585parse_abbr(const char *const p, PyObject **abbr)
1586{
1587 const char *ptr = p;
1588 char buff = *ptr;
1589 const char *str_start;
1590 const char *str_end;
1591
1592 if (*ptr == '<') {
1593 ptr++;
1594 str_start = ptr;
1595 while ((buff = *ptr) != '>') {
1596 // From the POSIX standard:
1597 //
1598 // In the quoted form, the first character shall be the less-than
1599 // ( '<' ) character and the last character shall be the
1600 // greater-than ( '>' ) character. All characters between these
1601 // quoting characters shall be alphanumeric characters from the
1602 // portable character set in the current locale, the plus-sign (
1603 // '+' ) character, or the minus-sign ( '-' ) character. The std
1604 // and dst fields in this case shall not include the quoting
1605 // characters.
1606 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1607 buff != '-') {
1608 return -1;
1609 }
1610 ptr++;
1611 }
1612 str_end = ptr;
1613 ptr++;
1614 }
1615 else {
1616 str_start = p;
1617 // From the POSIX standard:
1618 //
1619 // In the unquoted form, all characters in these fields shall be
1620 // alphabetic characters from the portable character set in the
1621 // current locale.
1622 while (isalpha(*ptr)) {
1623 ptr++;
1624 }
1625 str_end = ptr;
1626 }
1627
1628 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
Gregory P. Smithd780fa72020-06-22 00:39:28 -07001629 if (*abbr == NULL) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001630 return -1;
1631 }
1632
1633 return ptr - p;
1634}
1635
1636/* Parse a UTC offset from a TZ str. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001637static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001638parse_tz_delta(const char *const p, long *total_seconds)
1639{
1640 // From the POSIX spec:
1641 //
1642 // Indicates the value added to the local time to arrive at Coordinated
1643 // Universal Time. The offset has the form:
1644 //
1645 // hh[:mm[:ss]]
1646 //
1647 // One or more digits may be used; the value is always interpreted as a
1648 // decimal number.
1649 //
1650 // The POSIX spec says that the values for `hour` must be between 0 and 24
1651 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1652 // transition times may be signed and range from -167 to 167.
1653 long sign = -1;
1654 long hours = 0;
1655 long minutes = 0;
1656 long seconds = 0;
1657
1658 const char *ptr = p;
1659 char buff = *ptr;
1660 if (buff == '-' || buff == '+') {
1661 // Negative numbers correspond to *positive* offsets, from the spec:
1662 //
1663 // If preceded by a '-', the timezone shall be east of the Prime
1664 // Meridian; otherwise, it shall be west (which may be indicated by
1665 // an optional preceding '+' ).
1666 if (buff == '-') {
1667 sign = 1;
1668 }
1669
1670 ptr++;
1671 }
1672
1673 // The hour can be 1 or 2 numeric characters
1674 for (size_t i = 0; i < 2; ++i) {
1675 buff = *ptr;
1676 if (!isdigit(buff)) {
1677 if (i == 0) {
1678 return -1;
1679 }
1680 else {
1681 break;
1682 }
1683 }
1684
1685 hours *= 10;
1686 hours += buff - '0';
1687 ptr++;
1688 }
1689
1690 if (hours > 24 || hours < 0) {
1691 return -1;
1692 }
1693
1694 // Minutes and seconds always of the format ":dd"
1695 long *outputs[2] = {&minutes, &seconds};
1696 for (size_t i = 0; i < 2; ++i) {
1697 if (*ptr != ':') {
1698 goto complete;
1699 }
1700 ptr++;
1701
1702 for (size_t j = 0; j < 2; ++j) {
1703 buff = *ptr;
1704 if (!isdigit(buff)) {
1705 return -1;
1706 }
1707 *(outputs[i]) *= 10;
1708 *(outputs[i]) += buff - '0';
1709 ptr++;
1710 }
1711 }
1712
1713complete:
1714 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1715
1716 return ptr - p;
1717}
1718
1719/* Parse the date portion of a transition rule. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001720static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001721parse_transition_rule(const char *const p, TransitionRuleType **out)
1722{
1723 // The full transition rule indicates when to change back and forth between
1724 // STD and DST, and has the form:
1725 //
1726 // date[/time],date[/time]
1727 //
1728 // This function parses an individual date[/time] section, and returns
1729 // the number of characters that contributed to the transition rule. This
1730 // does not include the ',' at the end of the first rule.
1731 //
1732 // The POSIX spec states that if *time* is not given, the default is 02:00.
1733 const char *ptr = p;
1734 int8_t hour = 2;
1735 int8_t minute = 0;
1736 int8_t second = 0;
1737
1738 // Rules come in one of three flavors:
1739 //
1740 // 1. Jn: Julian day n, with no leap days.
1741 // 2. n: Day of year (0-based, with leap days)
1742 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1743
1744 if (*ptr == 'M') {
1745 uint8_t month, week, day;
1746 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001747 if (parse_uint(ptr, &month)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001748 return -1;
1749 }
Paul Ganssle62972d92020-05-16 04:20:06 -04001750 ptr++;
1751 if (*ptr != '.') {
Pablo Galindoe4799b92020-05-27 21:48:12 +01001752 uint8_t tmp;
1753 if (parse_uint(ptr, &tmp)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001754 return -1;
1755 }
1756
1757 month *= 10;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001758 month += tmp;
Paul Ganssle62972d92020-05-16 04:20:06 -04001759 ptr++;
1760 }
1761
1762 uint8_t *values[2] = {&week, &day};
1763 for (size_t i = 0; i < 2; ++i) {
1764 if (*ptr != '.') {
1765 return -1;
1766 }
1767 ptr++;
1768
Pablo Galindoe4799b92020-05-27 21:48:12 +01001769 if (parse_uint(ptr, values[i])) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001770 return -1;
1771 }
1772 ptr++;
Paul Ganssle62972d92020-05-16 04:20:06 -04001773 }
1774
1775 if (*ptr == '/') {
1776 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001777 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001778 parse_transition_time(ptr, &hour, &minute, &second);
1779 if (num_chars < 0) {
1780 return -1;
1781 }
1782 ptr += num_chars;
1783 }
1784
1785 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1786 if (rv == NULL) {
1787 return -1;
1788 }
1789
1790 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1791 PyMem_Free(rv);
1792 return -1;
1793 }
1794
1795 *out = (TransitionRuleType *)rv;
1796 }
1797 else {
1798 uint8_t julian = 0;
1799 unsigned int day = 0;
1800 if (*ptr == 'J') {
1801 julian = 1;
1802 ptr++;
1803 }
1804
1805 for (size_t i = 0; i < 3; ++i) {
1806 if (!isdigit(*ptr)) {
1807 if (i == 0) {
1808 return -1;
1809 }
1810 break;
1811 }
1812 day *= 10;
1813 day += (*ptr) - '0';
1814 ptr++;
1815 }
1816
1817 if (*ptr == '/') {
1818 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001819 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001820 parse_transition_time(ptr, &hour, &minute, &second);
1821 if (num_chars < 0) {
1822 return -1;
1823 }
1824 ptr += num_chars;
1825 }
1826
1827 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1828 if (rv == NULL) {
1829 return -1;
1830 }
1831
1832 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1833 PyMem_Free(rv);
1834 return -1;
1835 }
1836 *out = (TransitionRuleType *)rv;
1837 }
1838
1839 return ptr - p;
1840}
1841
1842/* Parse the time portion of a transition rule (e.g. following an /) */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001843static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001844parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1845 int8_t *second)
1846{
1847 // From the spec:
1848 //
1849 // The time has the same format as offset except that no leading sign
1850 // ( '-' or '+' ) is allowed.
1851 //
1852 // The format for the offset is:
1853 //
1854 // h[h][:mm[:ss]]
1855 //
1856 // RFC 8536 also allows transition times to be signed and to range from
1857 // -167 to +167, but the current version only supports [0, 99].
1858 //
1859 // TODO: Support the full range of transition hours.
1860 int8_t *components[3] = {hour, minute, second};
1861 const char *ptr = p;
1862 int8_t sign = 1;
1863
1864 if (*ptr == '-' || *ptr == '+') {
1865 if (*ptr == '-') {
1866 sign = -1;
1867 }
1868 ptr++;
1869 }
1870
1871 for (size_t i = 0; i < 3; ++i) {
1872 if (i > 0) {
1873 if (*ptr != ':') {
1874 break;
1875 }
1876 ptr++;
1877 }
1878
1879 uint8_t buff = 0;
1880 for (size_t j = 0; j < 2; j++) {
1881 if (!isdigit(*ptr)) {
1882 if (i == 0 && j > 0) {
1883 break;
1884 }
1885 return -1;
1886 }
1887
1888 buff *= 10;
1889 buff += (*ptr) - '0';
1890 ptr++;
1891 }
1892
1893 *(components[i]) = sign * buff;
1894 }
1895
1896 return ptr - p;
1897}
1898
1899/* Constructor for a _tzrule.
1900 *
1901 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1902 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1903 * NULL as well.
1904 *
1905 * Returns 0 on success.
1906 */
1907static int
1908build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1909 long dst_offset, TransitionRuleType *start,
1910 TransitionRuleType *end, _tzrule *out)
1911{
Dong-hee Naa487a392020-05-22 01:56:03 +09001912 _tzrule rv = {{0}};
Paul Ganssle62972d92020-05-16 04:20:06 -04001913
1914 rv.start = start;
1915 rv.end = end;
1916
1917 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1918 goto error;
1919 }
1920
1921 if (dst_abbr != NULL) {
1922 rv.dst_diff = dst_offset - std_offset;
1923 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1924 goto error;
1925 }
1926 }
1927 else {
1928 rv.std_only = 1;
1929 }
1930
1931 *out = rv;
1932
1933 return 0;
1934error:
1935 xdecref_ttinfo(&rv.std);
1936 xdecref_ttinfo(&rv.dst);
1937 return -1;
1938}
1939
1940/* Destructor for _tzrule. */
1941static void
1942free_tzrule(_tzrule *tzrule)
1943{
1944 xdecref_ttinfo(&(tzrule->std));
1945 if (!tzrule->std_only) {
1946 xdecref_ttinfo(&(tzrule->dst));
1947 }
1948
1949 if (tzrule->start != NULL) {
1950 PyMem_Free(tzrule->start);
1951 }
1952
1953 if (tzrule->end != NULL) {
1954 PyMem_Free(tzrule->end);
1955 }
1956}
1957
1958/* Calculate DST offsets from transitions and UTC offsets
1959 *
1960 * This is necessary because each C `ttinfo` only contains the UTC offset,
1961 * time zone abbreviation and an isdst boolean - it does not include the
1962 * amount of the DST offset, but we need the amount for the dst() function.
1963 *
1964 * Thus function uses heuristics to infer what the offset should be, so it
1965 * is not guaranteed that this will work for all zones. If we cannot assign
1966 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1967 * bool(dt.dst()) will always match ttinfo.isdst.
1968 */
1969static void
1970utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1971 unsigned char *isdsts, size_t num_transitions,
1972 size_t num_ttinfos)
1973{
1974 size_t dst_count = 0;
1975 size_t dst_found = 0;
1976 for (size_t i = 0; i < num_ttinfos; ++i) {
1977 dst_count++;
1978 }
1979
1980 for (size_t i = 1; i < num_transitions; ++i) {
1981 if (dst_count == dst_found) {
1982 break;
1983 }
1984
1985 size_t idx = trans_idx[i];
1986 size_t comp_idx = trans_idx[i - 1];
1987
1988 // Only look at DST offsets that have nto been assigned already
1989 if (!isdsts[idx] || dstoffs[idx] != 0) {
1990 continue;
1991 }
1992
1993 long dstoff = 0;
1994 long utcoff = utcoffs[idx];
1995
1996 if (!isdsts[comp_idx]) {
1997 dstoff = utcoff - utcoffs[comp_idx];
1998 }
1999
2000 if (!dstoff && idx < (num_ttinfos - 1)) {
2001 comp_idx = trans_idx[i + 1];
2002
2003 // If the following transition is also DST and we couldn't find
2004 // the DST offset by this point, we're going to have to skip it
2005 // and hope this transition gets assigned later
2006 if (isdsts[comp_idx]) {
2007 continue;
2008 }
2009
2010 dstoff = utcoff - utcoffs[comp_idx];
2011 }
2012
2013 if (dstoff) {
2014 dst_found++;
2015 dstoffs[idx] = dstoff;
2016 }
2017 }
2018
2019 if (dst_found < dst_count) {
2020 // If there are time zones we didn't find a value for, we'll end up
2021 // with dstoff = 0 for something where isdst=1. This is obviously
2022 // wrong — one hour will be a much better guess than 0.
2023 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2024 if (isdsts[idx] && !dstoffs[idx]) {
2025 dstoffs[idx] = 3600;
2026 }
2027 }
2028 }
2029}
2030
2031#define _swap(x, y, buffer) \
2032 buffer = x; \
2033 x = y; \
2034 y = buffer;
2035
2036/* Calculate transitions in local time from UTC time and offsets.
2037 *
2038 * We want to know when each transition occurs, denominated in the number of
2039 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2040 * *local time* (note: this is *not* equivalent to the output of
2041 * datetime.timestamp, which is the total number of seconds actual elapsed
2042 * since 1970-01-01T00:00:00Z in UTC).
2043 *
2044 * This is an ambiguous question because "local time" can be ambiguous — but it
2045 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2046 *
2047 * trans_local[0]: The wall-time transitions for fold=0
2048 * trans_local[1]: The wall-time transitions for fold=1
2049 *
2050 * This returns 0 on success and a negative number of failure. The trans_local
2051 * arrays must be freed if they are not NULL.
2052 */
2053static int
2054ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2055 int64_t *trans_local[2], size_t num_ttinfos,
2056 size_t num_transitions)
2057{
2058 if (num_transitions == 0) {
2059 return 0;
2060 }
2061
2062 // Copy the UTC transitions into each array to be modified in place later
2063 for (size_t i = 0; i < 2; ++i) {
2064 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2065 if (trans_local[i] == NULL) {
2066 return -1;
2067 }
2068
2069 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2070 }
2071
2072 int64_t offset_0, offset_1, buff;
2073 if (num_ttinfos > 1) {
2074 offset_0 = utcoff[0];
2075 offset_1 = utcoff[trans_idx[0]];
2076
2077 if (offset_1 > offset_0) {
2078 _swap(offset_0, offset_1, buff);
2079 }
2080 }
2081 else {
2082 offset_0 = utcoff[0];
2083 offset_1 = utcoff[0];
2084 }
2085
2086 trans_local[0][0] += offset_0;
2087 trans_local[1][0] += offset_1;
2088
2089 for (size_t i = 1; i < num_transitions; ++i) {
2090 offset_0 = utcoff[trans_idx[i - 1]];
2091 offset_1 = utcoff[trans_idx[i]];
2092
2093 if (offset_1 > offset_0) {
2094 _swap(offset_1, offset_0, buff);
2095 }
2096
2097 trans_local[0][i] += offset_0;
2098 trans_local[1][i] += offset_1;
2099 }
2100
2101 return 0;
2102}
2103
2104/* Simple bisect_right binary search implementation */
2105static size_t
2106_bisect(const int64_t value, const int64_t *arr, size_t size)
2107{
2108 size_t lo = 0;
2109 size_t hi = size;
2110 size_t m;
2111
2112 while (lo < hi) {
2113 m = (lo + hi) / 2;
2114 if (arr[m] > value) {
2115 hi = m;
2116 }
2117 else {
2118 lo = m + 1;
2119 }
2120 }
2121
2122 return hi;
2123}
2124
2125/* Find the ttinfo rules that apply at a given local datetime. */
2126static _ttinfo *
2127find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2128{
2129 // datetime.time has a .tzinfo attribute that passes None as the dt
2130 // argument; it only really has meaning for fixed-offset zones.
2131 if (dt == Py_None) {
2132 if (self->fixed_offset) {
2133 return &(self->tzrule_after.std);
2134 }
2135 else {
2136 return &NO_TTINFO;
2137 }
2138 }
2139
2140 int64_t ts;
2141 if (get_local_timestamp(dt, &ts)) {
2142 return NULL;
2143 }
2144
2145 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2146 assert(fold < 2);
2147 int64_t *local_transitions = self->trans_list_wall[fold];
2148 size_t num_trans = self->num_transitions;
2149
2150 if (num_trans && ts < local_transitions[0]) {
2151 return self->ttinfo_before;
2152 }
2153 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2154 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2155 PyDateTime_GET_YEAR(dt));
2156 }
2157 else {
2158 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2159 assert(idx < self->num_transitions);
2160 return self->trans_ttinfos[idx];
2161 }
2162}
2163
2164static int
2165is_leap_year(int year)
2166{
2167 const unsigned int ayear = (unsigned int)year;
2168 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2169}
2170
2171/* Calculates ordinal datetime from year, month and day. */
2172static int
2173ymd_to_ord(int y, int m, int d)
2174{
2175 y -= 1;
2176 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2177 int yearday = DAYS_BEFORE_MONTH[m];
2178 if (m > 2 && is_leap_year(y + 1)) {
2179 yearday += 1;
2180 }
2181
2182 return days_before_year + yearday + d;
2183}
2184
2185/* Calculate the number of seconds since 1970-01-01 in local time.
2186 *
2187 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2188 * can easily determine which transitions a datetime falls between. See the
2189 * comment above ts_to_local for more information.
2190 * */
2191static int
2192get_local_timestamp(PyObject *dt, int64_t *local_ts)
2193{
2194 assert(local_ts != NULL);
2195
2196 int hour, minute, second;
2197 int ord;
2198 if (PyDateTime_CheckExact(dt)) {
2199 int y = PyDateTime_GET_YEAR(dt);
2200 int m = PyDateTime_GET_MONTH(dt);
2201 int d = PyDateTime_GET_DAY(dt);
2202 hour = PyDateTime_DATE_GET_HOUR(dt);
2203 minute = PyDateTime_DATE_GET_MINUTE(dt);
2204 second = PyDateTime_DATE_GET_SECOND(dt);
2205
2206 ord = ymd_to_ord(y, m, d);
2207 }
2208 else {
2209 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2210 if (num == NULL) {
2211 return -1;
2212 }
2213
2214 ord = PyLong_AsLong(num);
2215 Py_DECREF(num);
2216 if (ord == -1 && PyErr_Occurred()) {
2217 return -1;
2218 }
2219
2220 num = PyObject_GetAttrString(dt, "hour");
2221 if (num == NULL) {
2222 return -1;
2223 }
2224 hour = PyLong_AsLong(num);
2225 Py_DECREF(num);
2226 if (hour == -1) {
2227 return -1;
2228 }
2229
2230 num = PyObject_GetAttrString(dt, "minute");
2231 if (num == NULL) {
2232 return -1;
2233 }
2234 minute = PyLong_AsLong(num);
2235 Py_DECREF(num);
2236 if (minute == -1) {
2237 return -1;
2238 }
2239
2240 num = PyObject_GetAttrString(dt, "second");
2241 if (num == NULL) {
2242 return -1;
2243 }
2244 second = PyLong_AsLong(num);
2245 Py_DECREF(num);
2246 if (second == -1) {
2247 return -1;
2248 }
2249 }
2250
2251 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2252 (int64_t)(hour * 3600 + minute * 60 + second);
2253
2254 return 0;
2255}
2256
2257/////
2258// Functions for cache handling
2259
2260/* Constructor for StrongCacheNode */
2261static StrongCacheNode *
2262strong_cache_node_new(PyObject *key, PyObject *zone)
2263{
2264 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2265 if (node == NULL) {
2266 return NULL;
2267 }
2268
2269 Py_INCREF(key);
2270 Py_INCREF(zone);
2271
2272 node->next = NULL;
2273 node->prev = NULL;
2274 node->key = key;
2275 node->zone = zone;
2276
2277 return node;
2278}
2279
2280/* Destructor for StrongCacheNode */
2281void
2282strong_cache_node_free(StrongCacheNode *node)
2283{
2284 Py_XDECREF(node->key);
2285 Py_XDECREF(node->zone);
2286
2287 PyMem_Free(node);
2288}
2289
2290/* Frees all nodes at or after a specified root in the strong cache.
2291 *
2292 * This can be used on the root node to free the entire cache or it can be used
2293 * to clear all nodes that have been expired (which, if everything is going
2294 * right, will actually only be 1 node at a time).
2295 */
2296void
2297strong_cache_free(StrongCacheNode *root)
2298{
2299 StrongCacheNode *node = root;
2300 StrongCacheNode *next_node;
2301 while (node != NULL) {
2302 next_node = node->next;
2303 strong_cache_node_free(node);
2304
2305 node = next_node;
2306 }
2307}
2308
2309/* Removes a node from the cache and update its neighbors.
2310 *
2311 * This is used both when ejecting a node from the cache and when moving it to
2312 * the front of the cache.
2313 */
2314static void
2315remove_from_strong_cache(StrongCacheNode *node)
2316{
2317 if (ZONEINFO_STRONG_CACHE == node) {
2318 ZONEINFO_STRONG_CACHE = node->next;
2319 }
2320
2321 if (node->prev != NULL) {
2322 node->prev->next = node->next;
2323 }
2324
2325 if (node->next != NULL) {
2326 node->next->prev = node->prev;
2327 }
2328
2329 node->next = NULL;
2330 node->prev = NULL;
2331}
2332
2333/* Retrieves the node associated with a key, if it exists.
2334 *
2335 * This traverses the strong cache until it finds a matching key and returns a
2336 * pointer to the relevant node if found. Returns NULL if no node is found.
2337 *
2338 * root may be NULL, indicating an empty cache.
2339 */
2340static StrongCacheNode *
2341find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2342{
2343 const StrongCacheNode *node = root;
2344 while (node != NULL) {
2345 if (PyObject_RichCompareBool(key, node->key, Py_EQ)) {
2346 return (StrongCacheNode *)node;
2347 }
2348
2349 node = node->next;
2350 }
2351
2352 return NULL;
2353}
2354
2355/* Ejects a given key from the class's strong cache, if applicable.
2356 *
2357 * This function is used to enable the per-key functionality in clear_cache.
2358 */
2359static void
2360eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2361{
2362 if (type != &PyZoneInfo_ZoneInfoType) {
2363 return;
2364 }
2365
2366 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2367 if (node != NULL) {
2368 remove_from_strong_cache(node);
2369
2370 strong_cache_node_free(node);
2371 }
2372}
2373
2374/* Moves a node to the front of the LRU cache.
2375 *
2376 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2377 * it is not at the front of the cache, it needs to be moved there.
2378 */
2379static void
2380move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2381{
2382 StrongCacheNode *root_p = *root;
2383 if (root_p == node) {
2384 return;
2385 }
2386
2387 remove_from_strong_cache(node);
2388
2389 node->prev = NULL;
2390 node->next = root_p;
2391
2392 if (root_p != NULL) {
2393 root_p->prev = node;
2394 }
2395
2396 *root = node;
2397}
2398
2399/* Retrieves a ZoneInfo from the strong cache if it's present.
2400 *
2401 * This function finds the ZoneInfo by key and if found will move the node to
2402 * the front of the LRU cache and return a new reference to it. It returns NULL
2403 * if the key is not in the cache.
2404 *
2405 * The strong cache is currently only implemented for the base class, so this
2406 * always returns a cache miss for subclasses.
2407 */
2408static PyObject *
2409zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2410{
2411 if (type != &PyZoneInfo_ZoneInfoType) {
2412 return NULL; // Strong cache currently only implemented for base class
2413 }
2414
2415 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2416
2417 if (node != NULL) {
2418 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2419 Py_INCREF(node->zone);
2420 return node->zone;
2421 }
2422
2423 return NULL; // Cache miss
2424}
2425
2426/* Inserts a new key into the strong LRU cache.
2427 *
2428 * This function is only to be used after a cache miss — it creates a new node
2429 * at the front of the cache and ejects any stale entries (keeping the size of
2430 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2431 */
2432static void
2433update_strong_cache(const PyTypeObject *const type, PyObject *key,
2434 PyObject *zone)
2435{
2436 if (type != &PyZoneInfo_ZoneInfoType) {
2437 return;
2438 }
2439
2440 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2441
2442 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2443
2444 StrongCacheNode *node = new_node->next;
2445 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2446 if (node == NULL) {
2447 return;
2448 }
2449 node = node->next;
2450 }
2451
2452 // Everything beyond this point needs to be freed
2453 if (node != NULL) {
2454 if (node->prev != NULL) {
2455 node->prev->next = NULL;
2456 }
2457 strong_cache_free(node);
2458 }
2459}
2460
2461/* Clears all entries into a type's strong cache.
2462 *
2463 * Because the strong cache is not implemented for subclasses, this is a no-op
2464 * for everything except the base class.
2465 */
2466void
2467clear_strong_cache(const PyTypeObject *const type)
2468{
2469 if (type != &PyZoneInfo_ZoneInfoType) {
2470 return;
2471 }
2472
2473 strong_cache_free(ZONEINFO_STRONG_CACHE);
2474}
2475
2476static PyObject *
Benjamin Peterson0108b2a2020-07-15 10:02:14 -07002477new_weak_cache(void)
Paul Ganssle62972d92020-05-16 04:20:06 -04002478{
2479 PyObject *weakref_module = PyImport_ImportModule("weakref");
2480 if (weakref_module == NULL) {
2481 return NULL;
2482 }
2483
2484 PyObject *weak_cache =
2485 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2486 Py_DECREF(weakref_module);
2487 return weak_cache;
2488}
2489
2490static int
Benjamin Peterson0108b2a2020-07-15 10:02:14 -07002491initialize_caches(void)
Paul Ganssle62972d92020-05-16 04:20:06 -04002492{
Ammar Askar06a1b892020-05-22 16:10:55 +00002493 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
Paul Ganssle62972d92020-05-16 04:20:06 -04002494 if (TIMEDELTA_CACHE == NULL) {
2495 TIMEDELTA_CACHE = PyDict_New();
2496 }
2497 else {
2498 Py_INCREF(TIMEDELTA_CACHE);
2499 }
2500
2501 if (TIMEDELTA_CACHE == NULL) {
2502 return -1;
2503 }
2504
2505 if (ZONEINFO_WEAK_CACHE == NULL) {
2506 ZONEINFO_WEAK_CACHE = new_weak_cache();
2507 }
2508 else {
2509 Py_INCREF(ZONEINFO_WEAK_CACHE);
2510 }
2511
2512 if (ZONEINFO_WEAK_CACHE == NULL) {
2513 return -1;
2514 }
2515
2516 return 0;
2517}
2518
2519static PyObject *
2520zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2521{
2522 PyObject *weak_cache = new_weak_cache();
2523 if (weak_cache == NULL) {
2524 return NULL;
2525 }
2526
2527 PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache);
2528 Py_RETURN_NONE;
2529}
2530
2531/////
2532// Specify the ZoneInfo type
2533static PyMethodDef zoneinfo_methods[] = {
2534 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2535 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2536 PyDoc_STR("Clear the ZoneInfo cache.")},
2537 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2538 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2539 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2540 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2541 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2542 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2543 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2544 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2545 "the given datetime.")},
2546 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2547 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2548 "in a zone at the given datetime.")},
2549 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2550 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2551 "zone that applies in a zone at a given datetime.")},
2552 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2553 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2554 "datetime in local time.")},
2555 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2556 PyDoc_STR("Function for serialization with the pickle protocol.")},
2557 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2558 PyDoc_STR("Private method used in unpickling.")},
2559 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2560 METH_VARARGS | METH_KEYWORDS,
2561 PyDoc_STR("Function to initialize subclasses.")},
2562 {NULL} /* Sentinel */
2563};
2564
2565static PyMemberDef zoneinfo_members[] = {
2566 {.name = "key",
2567 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2568 .type = T_OBJECT_EX,
2569 .flags = READONLY,
2570 .doc = NULL},
2571 {NULL}, /* Sentinel */
2572};
2573
2574static PyTypeObject PyZoneInfo_ZoneInfoType = {
2575 PyVarObject_HEAD_INIT(NULL, 0) //
2576 .tp_name = "zoneinfo.ZoneInfo",
2577 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2578 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2579 .tp_repr = (reprfunc)zoneinfo_repr,
2580 .tp_str = (reprfunc)zoneinfo_str,
2581 .tp_getattro = PyObject_GenericGetAttr,
2582 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2583 /* .tp_doc = zoneinfo_doc, */
2584 .tp_methods = zoneinfo_methods,
2585 .tp_members = zoneinfo_members,
2586 .tp_new = zoneinfo_new,
2587 .tp_dealloc = zoneinfo_dealloc,
2588};
2589
2590/////
2591// Specify the _zoneinfo module
2592static PyMethodDef module_methods[] = {{NULL, NULL}};
2593static void
2594module_free()
2595{
2596 Py_XDECREF(_tzpath_find_tzfile);
2597 _tzpath_find_tzfile = NULL;
2598
2599 Py_XDECREF(_common_mod);
2600 _common_mod = NULL;
2601
2602 Py_XDECREF(io_open);
2603 io_open = NULL;
2604
2605 xdecref_ttinfo(&NO_TTINFO);
2606
Ammar Askar06a1b892020-05-22 16:10:55 +00002607 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2608 Py_DECREF(TIMEDELTA_CACHE);
2609 } else {
2610 Py_CLEAR(TIMEDELTA_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002611 }
2612
Ammar Askar06a1b892020-05-22 16:10:55 +00002613 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2614 Py_DECREF(ZONEINFO_WEAK_CACHE);
2615 } else {
2616 Py_CLEAR(ZONEINFO_WEAK_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002617 }
2618
2619 strong_cache_free(ZONEINFO_STRONG_CACHE);
2620 ZONEINFO_STRONG_CACHE = NULL;
2621}
2622
2623static int
2624zoneinfomodule_exec(PyObject *m)
2625{
2626 PyDateTime_IMPORT;
2627 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2628 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2629 goto error;
2630 }
2631
2632 Py_INCREF(&PyZoneInfo_ZoneInfoType);
2633 PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2634
2635 /* Populate imports */
2636 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2637 if (_tzpath_module == NULL) {
2638 goto error;
2639 }
2640
2641 _tzpath_find_tzfile =
2642 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2643 Py_DECREF(_tzpath_module);
2644 if (_tzpath_find_tzfile == NULL) {
2645 goto error;
2646 }
2647
2648 PyObject *io_module = PyImport_ImportModule("io");
2649 if (io_module == NULL) {
2650 goto error;
2651 }
2652
2653 io_open = PyObject_GetAttrString(io_module, "open");
2654 Py_DECREF(io_module);
2655 if (io_open == NULL) {
2656 goto error;
2657 }
2658
2659 _common_mod = PyImport_ImportModule("zoneinfo._common");
2660 if (_common_mod == NULL) {
2661 goto error;
2662 }
2663
2664 if (NO_TTINFO.utcoff == NULL) {
2665 NO_TTINFO.utcoff = Py_None;
2666 NO_TTINFO.dstoff = Py_None;
2667 NO_TTINFO.tzname = Py_None;
2668
2669 for (size_t i = 0; i < 3; ++i) {
2670 Py_INCREF(Py_None);
2671 }
2672 }
2673
2674 if (initialize_caches()) {
2675 goto error;
2676 }
2677
2678 return 0;
2679
2680error:
2681 return -1;
2682}
2683
2684static PyModuleDef_Slot zoneinfomodule_slots[] = {
2685 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2686
2687static struct PyModuleDef zoneinfomodule = {
2688 PyModuleDef_HEAD_INIT,
2689 .m_name = "_zoneinfo",
2690 .m_doc = "C implementation of the zoneinfo module",
2691 .m_size = 0,
2692 .m_methods = module_methods,
2693 .m_slots = zoneinfomodule_slots,
2694 .m_free = (freefunc)module_free};
2695
2696PyMODINIT_FUNC
2697PyInit__zoneinfo(void)
2698{
2699 return PyModuleDef_Init(&zoneinfomodule);
2700}