blob: e8b28319993a18d4a350287320548bee249dd720 [file] [log] [blame]
Paul Ganssle62972d92020-05-16 04:20:06 -04001#include "Python.h"
2#include "structmember.h"
3
4#include <ctype.h>
5#include <stddef.h>
6#include <stdint.h>
7
8#include "datetime.h"
9
10// Imports
11static PyObject *io_open = NULL;
12static PyObject *_tzpath_find_tzfile = NULL;
13static PyObject *_common_mod = NULL;
14
15typedef struct TransitionRuleType TransitionRuleType;
16typedef struct StrongCacheNode StrongCacheNode;
17
18typedef struct {
19 PyObject *utcoff;
20 PyObject *dstoff;
21 PyObject *tzname;
22 long utcoff_seconds;
23} _ttinfo;
24
25typedef struct {
26 _ttinfo std;
27 _ttinfo dst;
28 int dst_diff;
29 TransitionRuleType *start;
30 TransitionRuleType *end;
31 unsigned char std_only;
32} _tzrule;
33
34typedef struct {
35 PyDateTime_TZInfo base;
36 PyObject *key;
37 PyObject *file_repr;
38 PyObject *weakreflist;
Pablo Galindoe4799b92020-05-27 21:48:12 +010039 size_t num_transitions;
40 size_t num_ttinfos;
Paul Ganssle62972d92020-05-16 04:20:06 -040041 int64_t *trans_list_utc;
42 int64_t *trans_list_wall[2];
43 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition
44 _ttinfo *ttinfo_before;
45 _tzrule tzrule_after;
46 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation
47 unsigned char fixed_offset;
48 unsigned char source;
49} PyZoneInfo_ZoneInfo;
50
51struct TransitionRuleType {
52 int64_t (*year_to_timestamp)(TransitionRuleType *, int);
53};
54
55typedef struct {
56 TransitionRuleType base;
57 uint8_t month;
58 uint8_t week;
59 uint8_t day;
60 int8_t hour;
61 int8_t minute;
62 int8_t second;
63} CalendarRule;
64
65typedef struct {
66 TransitionRuleType base;
67 uint8_t julian;
68 unsigned int day;
69 int8_t hour;
70 int8_t minute;
71 int8_t second;
72} DayRule;
73
74struct StrongCacheNode {
75 StrongCacheNode *next;
76 StrongCacheNode *prev;
77 PyObject *key;
78 PyObject *zone;
79};
80
81static PyTypeObject PyZoneInfo_ZoneInfoType;
82
83// Globals
84static PyObject *TIMEDELTA_CACHE = NULL;
85static PyObject *ZONEINFO_WEAK_CACHE = NULL;
86static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
87static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
88
89static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
90
91// Constants
92static const int EPOCHORDINAL = 719163;
93static int DAYS_IN_MONTH[] = {
94 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
95};
96
97static int DAYS_BEFORE_MONTH[] = {
98 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
99};
100
101static const int SOURCE_NOCACHE = 0;
102static const int SOURCE_CACHE = 1;
103static const int SOURCE_FILE = 2;
104
105// Forward declarations
106static int
107load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
108static void
109utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
110 unsigned char *isdsts, size_t num_transitions,
111 size_t num_ttinfos);
112static int
113ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
114 int64_t *trans_local[2], size_t num_ttinfos,
115 size_t num_transitions);
116
117static int
118parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
119
Pablo Galindoe4799b92020-05-27 21:48:12 +0100120static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400121parse_abbr(const char *const p, PyObject **abbr);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100122static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400123parse_tz_delta(const char *const p, long *total_seconds);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100124static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400125parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
126 int8_t *second);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100127static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -0400128parse_transition_rule(const char *const p, TransitionRuleType **out);
129
130static _ttinfo *
131find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
132static _ttinfo *
133find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
134 unsigned char *fold);
135
136static int
137build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
138static void
139xdecref_ttinfo(_ttinfo *ttinfo);
140static int
141ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
142
143static int
144build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
145 long dst_offset, TransitionRuleType *start,
146 TransitionRuleType *end, _tzrule *out);
147static void
148free_tzrule(_tzrule *tzrule);
149
150static PyObject *
151load_timedelta(long seconds);
152
153static int
154get_local_timestamp(PyObject *dt, int64_t *local_ts);
155static _ttinfo *
156find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
157
158static int
159ymd_to_ord(int y, int m, int d);
160static int
161is_leap_year(int year);
162
163static size_t
164_bisect(const int64_t value, const int64_t *arr, size_t size);
165
166static void
167eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
168static void
169clear_strong_cache(const PyTypeObject *const type);
170static void
171update_strong_cache(const PyTypeObject *const type, PyObject *key,
172 PyObject *zone);
173static PyObject *
174zone_from_strong_cache(const PyTypeObject *const type, PyObject *key);
175
176static PyObject *
177zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
178{
179 PyObject *file_obj = NULL;
180 PyObject *file_path = NULL;
181
182 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
183 if (file_path == NULL) {
184 return NULL;
185 }
186 else if (file_path == Py_None) {
187 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
188 if (file_obj == NULL) {
189 Py_DECREF(file_path);
190 return NULL;
191 }
192 }
193
194 PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
195 if (self == NULL) {
196 goto error;
197 }
198
199 if (file_obj == NULL) {
200 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
201 if (file_obj == NULL) {
202 goto error;
203 }
204 }
205
206 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
207 goto error;
208 }
209
210 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
211 Py_DECREF(file_obj);
212 file_obj = NULL;
213 if (rv == NULL) {
214 goto error;
215 }
216 Py_DECREF(rv);
217
218 ((PyZoneInfo_ZoneInfo *)self)->key = key;
219 Py_INCREF(key);
220
221 goto cleanup;
222error:
223 Py_XDECREF(self);
224 self = NULL;
225cleanup:
226 if (file_obj != NULL) {
227 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
228 Py_DECREF(tmp);
229 Py_DECREF(file_obj);
230 }
231 Py_DECREF(file_path);
232 return self;
233}
234
235static PyObject *
236get_weak_cache(PyTypeObject *type)
237{
238 if (type == &PyZoneInfo_ZoneInfoType) {
239 return ZONEINFO_WEAK_CACHE;
240 }
241 else {
242 PyObject *cache =
243 PyObject_GetAttrString((PyObject *)type, "_weak_cache");
244 // We are assuming that the type lives at least as long as the function
245 // that calls get_weak_cache, and that it holds a reference to the
246 // cache, so we'll return a "borrowed reference".
247 Py_XDECREF(cache);
248 return cache;
249 }
250}
251
252static PyObject *
253zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
254{
255 PyObject *key = NULL;
256 static char *kwlist[] = {"key", NULL};
257 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
258 return NULL;
259 }
260
261 PyObject *instance = zone_from_strong_cache(type, key);
262 if (instance != NULL) {
263 return instance;
264 }
265
266 PyObject *weak_cache = get_weak_cache(type);
267 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
268 if (instance == NULL) {
269 return NULL;
270 }
271
272 if (instance == Py_None) {
273 Py_DECREF(instance);
274 PyObject *tmp = zoneinfo_new_instance(type, key);
275 if (tmp == NULL) {
276 return NULL;
277 }
278
279 instance =
280 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
281 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
282
283 Py_DECREF(tmp);
284
285 if (instance == NULL) {
286 return NULL;
287 }
288 }
289
290 update_strong_cache(type, key, instance);
291 return instance;
292}
293
294static void
295zoneinfo_dealloc(PyObject *obj_self)
296{
297 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
298
299 if (self->weakreflist != NULL) {
300 PyObject_ClearWeakRefs(obj_self);
301 }
302
303 if (self->trans_list_utc != NULL) {
304 PyMem_Free(self->trans_list_utc);
305 }
306
307 for (size_t i = 0; i < 2; i++) {
308 if (self->trans_list_wall[i] != NULL) {
309 PyMem_Free(self->trans_list_wall[i]);
310 }
311 }
312
313 if (self->_ttinfos != NULL) {
314 for (size_t i = 0; i < self->num_ttinfos; ++i) {
315 xdecref_ttinfo(&(self->_ttinfos[i]));
316 }
317 PyMem_Free(self->_ttinfos);
318 }
319
320 if (self->trans_ttinfos != NULL) {
321 PyMem_Free(self->trans_ttinfos);
322 }
323
324 free_tzrule(&(self->tzrule_after));
325
326 Py_XDECREF(self->key);
327 Py_XDECREF(self->file_repr);
328
329 Py_TYPE(self)->tp_free((PyObject *)self);
330}
331
332static PyObject *
333zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
334{
335 PyObject *file_obj = NULL;
336 PyObject *file_repr = NULL;
337 PyObject *key = Py_None;
338 PyZoneInfo_ZoneInfo *self = NULL;
339
340 static char *kwlist[] = {"", "key", NULL};
341 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
342 &key)) {
343 return NULL;
344 }
345
346 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
347 self = (PyZoneInfo_ZoneInfo *)obj_self;
348 if (self == NULL) {
349 return NULL;
350 }
351
352 file_repr = PyUnicode_FromFormat("%R", file_obj);
353 if (file_repr == NULL) {
354 goto error;
355 }
356
357 if (load_data(self, file_obj)) {
358 goto error;
359 }
360
361 self->source = SOURCE_FILE;
362 self->file_repr = file_repr;
363 self->key = key;
364 Py_INCREF(key);
365
366 return obj_self;
367error:
368 Py_XDECREF(file_repr);
369 Py_XDECREF(self);
370 return NULL;
371}
372
373static PyObject *
374zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
375{
376 static char *kwlist[] = {"key", NULL};
377 PyObject *key = NULL;
378 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
379 return NULL;
380 }
381
382 PyObject *out = zoneinfo_new_instance(cls, key);
383 if (out != NULL) {
384 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
385 }
386
387 return out;
388}
389
390static PyObject *
391zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
392{
393 PyObject *only_keys = NULL;
394 static char *kwlist[] = {"only_keys", NULL};
395
396 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
397 &only_keys))) {
398 return NULL;
399 }
400
401 PyTypeObject *type = (PyTypeObject *)cls;
402 PyObject *weak_cache = get_weak_cache(type);
403
404 if (only_keys == NULL || only_keys == Py_None) {
405 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
406 if (rv != NULL) {
407 Py_DECREF(rv);
408 }
409
410 clear_strong_cache(type);
411 ZONEINFO_STRONG_CACHE = NULL;
412 }
413 else {
414 PyObject *item = NULL;
415 PyObject *pop = PyUnicode_FromString("pop");
416 if (pop == NULL) {
417 return NULL;
418 }
419
420 PyObject *iter = PyObject_GetIter(only_keys);
421 if (iter == NULL) {
422 Py_DECREF(pop);
423 return NULL;
424 }
425
426 while ((item = PyIter_Next(iter))) {
427 // Remove from strong cache
428 eject_from_strong_cache(type, item);
429
430 // Remove from weak cache
431 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
432 Py_None, NULL);
433
434 Py_DECREF(item);
435 if (tmp == NULL) {
436 break;
437 }
438 Py_DECREF(tmp);
439 }
440 Py_DECREF(iter);
441 Py_DECREF(pop);
442 }
443
444 if (PyErr_Occurred()) {
445 return NULL;
446 }
447
448 Py_RETURN_NONE;
449}
450
451static PyObject *
452zoneinfo_utcoffset(PyObject *self, PyObject *dt)
453{
454 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
455 if (tti == NULL) {
456 return NULL;
457 }
458 Py_INCREF(tti->utcoff);
459 return tti->utcoff;
460}
461
462static PyObject *
463zoneinfo_dst(PyObject *self, PyObject *dt)
464{
465 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
466 if (tti == NULL) {
467 return NULL;
468 }
469 Py_INCREF(tti->dstoff);
470 return tti->dstoff;
471}
472
473static PyObject *
474zoneinfo_tzname(PyObject *self, PyObject *dt)
475{
476 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
477 if (tti == NULL) {
478 return NULL;
479 }
480 Py_INCREF(tti->tzname);
481 return tti->tzname;
482}
483
484#define HASTZINFO(p) (((_PyDateTime_BaseTZInfo *)(p))->hastzinfo)
485#define GET_DT_TZINFO(p) \
486 (HASTZINFO(p) ? ((PyDateTime_DateTime *)(p))->tzinfo : Py_None)
487
488static PyObject *
489zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
490{
491 if (!PyDateTime_Check(dt)) {
492 PyErr_SetString(PyExc_TypeError,
493 "fromutc: argument must be a datetime");
494 return NULL;
495 }
496 if (GET_DT_TZINFO(dt) != obj_self) {
497 PyErr_SetString(PyExc_ValueError,
498 "fromutc: dt.tzinfo "
499 "is not self");
500 return NULL;
501 }
502
503 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
504
505 int64_t timestamp;
506 if (get_local_timestamp(dt, &timestamp)) {
507 return NULL;
508 }
509 size_t num_trans = self->num_transitions;
510
511 _ttinfo *tti = NULL;
512 unsigned char fold = 0;
513
514 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
515 tti = self->ttinfo_before;
516 }
517 else if (num_trans == 0 ||
518 timestamp > self->trans_list_utc[num_trans - 1]) {
519 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
520 PyDateTime_GET_YEAR(dt), &fold);
521
522 // Immediately after the last manual transition, the fold/gap is
523 // between self->trans_ttinfos[num_transitions - 1] and whatever
524 // ttinfo applies immediately after the last transition, not between
525 // the STD and DST rules in the tzrule_after, so we may need to
526 // adjust the fold value.
527 if (num_trans) {
528 _ttinfo *tti_prev = NULL;
529 if (num_trans == 1) {
530 tti_prev = self->ttinfo_before;
531 }
532 else {
533 tti_prev = self->trans_ttinfos[num_trans - 2];
534 }
535 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
536 if (diff > 0 &&
537 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
538 fold = 1;
539 }
540 }
541 }
542 else {
543 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
544 _ttinfo *tti_prev = NULL;
545
546 if (idx >= 2) {
547 tti_prev = self->trans_ttinfos[idx - 2];
548 tti = self->trans_ttinfos[idx - 1];
549 }
550 else {
551 tti_prev = self->ttinfo_before;
552 tti = self->trans_ttinfos[0];
553 }
554
555 // Detect fold
556 int64_t shift =
557 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
558 if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
559 fold = 1;
560 }
561 }
562
563 PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
564 if (tmp == NULL) {
565 return NULL;
566 }
567
568 if (fold) {
569 if (PyDateTime_CheckExact(tmp)) {
570 ((PyDateTime_DateTime *)tmp)->fold = 1;
571 dt = tmp;
572 }
573 else {
574 PyObject *replace = PyObject_GetAttrString(tmp, "replace");
575 PyObject *args = PyTuple_New(0);
576 PyObject *kwargs = PyDict_New();
577
578 Py_DECREF(tmp);
579 if (args == NULL || kwargs == NULL || replace == NULL) {
580 Py_XDECREF(args);
581 Py_XDECREF(kwargs);
582 Py_XDECREF(replace);
583 return NULL;
584 }
585
586 dt = NULL;
587 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_One)) {
588 dt = PyObject_Call(replace, args, kwargs);
589 }
590
591 Py_DECREF(args);
592 Py_DECREF(kwargs);
593 Py_DECREF(replace);
594
595 if (dt == NULL) {
596 return NULL;
597 }
598 }
599 }
600 else {
601 dt = tmp;
602 }
603 return dt;
604}
605
606static PyObject *
607zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
608{
609 PyObject *rv = NULL;
610 const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
611 if (!(self->key == Py_None)) {
612 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
613 }
614 else {
615 assert(PyUnicode_Check(self->file_repr));
616 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
617 self->file_repr);
618 }
619
620 return rv;
621}
622
623static PyObject *
624zoneinfo_str(PyZoneInfo_ZoneInfo *self)
625{
626 if (!(self->key == Py_None)) {
627 Py_INCREF(self->key);
628 return self->key;
629 }
630 else {
631 return zoneinfo_repr(self);
632 }
633}
634
635/* Pickles the ZoneInfo object by key and source.
636 *
637 * ZoneInfo objects are pickled by reference to the TZif file that they came
638 * from, which means that the exact transitions may be different or the file
639 * may not un-pickle if the data has changed on disk in the interim.
640 *
641 * It is necessary to include a bit indicating whether or not the object
642 * was constructed from the cache, because from-cache objects will hit the
643 * unpickling process's cache, whereas no-cache objects will bypass it.
644 *
645 * Objects constructed from ZoneInfo.from_file cannot be pickled.
646 */
647static PyObject *
648zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
649{
650 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
651 if (self->source == SOURCE_FILE) {
652 // Objects constructed from files cannot be pickled.
653 PyObject *pickle = PyImport_ImportModule("pickle");
654 if (pickle == NULL) {
655 return NULL;
656 }
657
658 PyObject *pickle_error =
659 PyObject_GetAttrString(pickle, "PicklingError");
660 Py_DECREF(pickle);
661 if (pickle_error == NULL) {
662 return NULL;
663 }
664
665 PyErr_Format(pickle_error,
666 "Cannot pickle a ZoneInfo file from a file stream.");
667 Py_DECREF(pickle_error);
668 return NULL;
669 }
670
671 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
672 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
673
674 if (constructor == NULL) {
675 return NULL;
676 }
677
678 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
679 Py_DECREF(constructor);
680 return rv;
681}
682
683static PyObject *
684zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
685{
686 PyObject *key;
687 unsigned char from_cache;
688 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
689 return NULL;
690 }
691
692 if (from_cache) {
693 PyObject *val_args = Py_BuildValue("(O)", key);
694 if (val_args == NULL) {
695 return NULL;
696 }
697
698 PyObject *rv = zoneinfo_new(cls, val_args, NULL);
699
700 Py_DECREF(val_args);
701 return rv;
702 }
703 else {
704 return zoneinfo_new_instance(cls, key);
705 }
706}
707
708/* It is relatively expensive to construct new timedelta objects, and in most
709 * cases we're looking at a relatively small number of timedeltas, such as
710 * integer number of hours, etc. We will keep a cache so that we construct
711 * a minimal number of these.
712 *
713 * Possibly this should be replaced with an LRU cache so that it's not possible
714 * for the memory usage to explode from this, but in order for this to be a
715 * serious problem, one would need to deliberately craft a malicious time zone
716 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
717 * fills the cache with ~450 timedeltas for a total size of ~12kB.
718 *
719 * This returns a new reference to the timedelta.
720 */
721static PyObject *
722load_timedelta(long seconds)
723{
724 PyObject *rv = NULL;
725 PyObject *pyoffset = PyLong_FromLong(seconds);
726 if (pyoffset == NULL) {
727 return NULL;
728 }
729 int contains = PyDict_Contains(TIMEDELTA_CACHE, pyoffset);
730 if (contains == -1) {
731 goto error;
732 }
733
734 if (!contains) {
735 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
736 0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
737
738 if (tmp == NULL) {
739 goto error;
740 }
741
742 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
743 Py_DECREF(tmp);
744 }
745 else {
746 rv = PyDict_GetItem(TIMEDELTA_CACHE, pyoffset);
747 }
748
749 Py_DECREF(pyoffset);
750 Py_INCREF(rv);
751 return rv;
752error:
753 Py_DECREF(pyoffset);
754 return NULL;
755}
756
757/* Constructor for _ttinfo object - this starts by initializing the _ttinfo
758 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
759 * initialized _ttinfo objects.
760 */
761static int
762build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
763{
764 out->utcoff = NULL;
765 out->dstoff = NULL;
766 out->tzname = NULL;
767
768 out->utcoff_seconds = utcoffset;
769 out->utcoff = load_timedelta(utcoffset);
770 if (out->utcoff == NULL) {
771 return -1;
772 }
773
774 out->dstoff = load_timedelta(dstoffset);
775 if (out->dstoff == NULL) {
776 return -1;
777 }
778
779 out->tzname = tzname;
780 Py_INCREF(tzname);
781
782 return 0;
783}
784
785/* Decrease reference count on any non-NULL members of a _ttinfo */
786static void
787xdecref_ttinfo(_ttinfo *ttinfo)
788{
789 if (ttinfo != NULL) {
790 Py_XDECREF(ttinfo->utcoff);
791 Py_XDECREF(ttinfo->dstoff);
792 Py_XDECREF(ttinfo->tzname);
793 }
794}
795
796/* Equality function for _ttinfo. */
797static int
798ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
799{
800 int rv;
801 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
802 1) {
803 goto end;
804 }
805
806 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
807 1) {
808 goto end;
809 }
810
811 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
812 1) {
813 goto end;
814 }
815end:
816 return rv;
817}
818
819/* Given a file-like object, this populates a ZoneInfo object
820 *
821 * The current version calls into a Python function to read the data from
822 * file into Python objects, and this translates those Python objects into
823 * C values and calculates derived values (e.g. dstoff) in C.
824 *
825 * This returns 0 on success and -1 on failure.
826 *
827 * The function will never return while `self` is partially initialized —
828 * the object only needs to be freed / deallocated if this succeeds.
829 */
830static int
831load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
832{
833 PyObject *data_tuple = NULL;
834
835 long *utcoff = NULL;
836 long *dstoff = NULL;
837 size_t *trans_idx = NULL;
838 unsigned char *isdst = NULL;
839
840 self->trans_list_utc = NULL;
841 self->trans_list_wall[0] = NULL;
842 self->trans_list_wall[1] = NULL;
843 self->trans_ttinfos = NULL;
844 self->_ttinfos = NULL;
845 self->file_repr = NULL;
846
847 size_t ttinfos_allocated = 0;
848
849 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
850
851 if (data_tuple == NULL) {
852 goto error;
853 }
854
855 if (!PyTuple_CheckExact(data_tuple)) {
856 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
857 data_tuple);
858 goto error;
859 }
860
861 // Unpack the data tuple
862 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
863 if (trans_idx_list == NULL) {
864 goto error;
865 }
866
867 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
868 if (trans_utc == NULL) {
869 goto error;
870 }
871
872 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
873 if (utcoff_list == NULL) {
874 goto error;
875 }
876
877 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
878 if (isdst_list == NULL) {
879 goto error;
880 }
881
882 PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
883 if (abbr == NULL) {
884 goto error;
885 }
886
887 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
888 if (tz_str == NULL) {
889 goto error;
890 }
891
892 // Load the relevant sizes
893 Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100894 if (num_transitions < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400895 goto error;
896 }
897
898 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
Pablo Galindoe4799b92020-05-27 21:48:12 +0100899 if (num_ttinfos < 0) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400900 goto error;
901 }
902
903 self->num_transitions = (size_t)num_transitions;
904 self->num_ttinfos = (size_t)num_ttinfos;
905
906 // Load the transition indices and list
907 self->trans_list_utc =
908 PyMem_Malloc(self->num_transitions * sizeof(int64_t));
909 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
910
Pablo Galindoe4799b92020-05-27 21:48:12 +0100911 for (size_t i = 0; i < self->num_transitions; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400912 PyObject *num = PyTuple_GetItem(trans_utc, i);
913 if (num == NULL) {
914 goto error;
915 }
916 self->trans_list_utc[i] = PyLong_AsLongLong(num);
917 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
918 goto error;
919 }
920
921 num = PyTuple_GetItem(trans_idx_list, i);
922 if (num == NULL) {
923 goto error;
924 }
925
926 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
927 if (cur_trans_idx == -1) {
928 goto error;
929 }
930
931 trans_idx[i] = (size_t)cur_trans_idx;
932 if (trans_idx[i] > self->num_ttinfos) {
933 PyErr_Format(
934 PyExc_ValueError,
935 "Invalid transition index found while reading TZif: %zd",
936 cur_trans_idx);
937
938 goto error;
939 }
940 }
941
942 // Load UTC offsets and isdst (size num_ttinfos)
943 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
944 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
945
946 if (utcoff == NULL || isdst == NULL) {
947 goto error;
948 }
Pablo Galindoe4799b92020-05-27 21:48:12 +0100949 for (size_t i = 0; i < self->num_ttinfos; ++i) {
Paul Ganssle62972d92020-05-16 04:20:06 -0400950 PyObject *num = PyTuple_GetItem(utcoff_list, i);
951 if (num == NULL) {
952 goto error;
953 }
954
955 utcoff[i] = PyLong_AsLong(num);
956 if (utcoff[i] == -1 && PyErr_Occurred()) {
957 goto error;
958 }
959
960 num = PyTuple_GetItem(isdst_list, i);
961 if (num == NULL) {
962 goto error;
963 }
964
965 int isdst_with_error = PyObject_IsTrue(num);
966 if (isdst_with_error == -1) {
967 goto error;
968 }
969 else {
970 isdst[i] = (unsigned char)isdst_with_error;
971 }
972 }
973
974 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
975 if (dstoff == NULL) {
976 goto error;
977 }
978
979 // Derive dstoff and trans_list_wall from the information we've loaded
980 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
981 self->num_ttinfos);
982
983 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
984 self->trans_list_wall, self->num_ttinfos,
985 self->num_transitions)) {
986 goto error;
987 }
988
989 // Build _ttinfo objects from utcoff, dstoff and abbr
990 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
991 for (size_t i = 0; i < self->num_ttinfos; ++i) {
992 PyObject *tzname = PyTuple_GetItem(abbr, i);
993 if (tzname == NULL) {
994 goto error;
995 }
996
997 ttinfos_allocated++;
998 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
999 goto error;
1000 }
1001 }
1002
1003 // Build our mapping from transition to the ttinfo that applies
1004 self->trans_ttinfos =
1005 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1006 for (size_t i = 0; i < self->num_transitions; ++i) {
1007 size_t ttinfo_idx = trans_idx[i];
1008 assert(ttinfo_idx < self->num_ttinfos);
1009 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1010 }
1011
1012 // Set ttinfo_before to the first non-DST transition
1013 for (size_t i = 0; i < self->num_ttinfos; ++i) {
1014 if (!isdst[i]) {
1015 self->ttinfo_before = &(self->_ttinfos[i]);
1016 break;
1017 }
1018 }
1019
1020 // If there are only DST ttinfos, pick the first one, if there are no
1021 // ttinfos at all, set ttinfo_before to NULL
1022 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1023 self->ttinfo_before = &(self->_ttinfos[0]);
1024 }
1025
1026 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1027 if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1028 goto error;
1029 }
1030 }
1031 else {
1032 if (!self->num_ttinfos) {
1033 PyErr_Format(PyExc_ValueError, "No time zone information found.");
1034 goto error;
1035 }
1036
1037 size_t idx;
1038 if (!self->num_transitions) {
1039 idx = self->num_ttinfos - 1;
1040 }
1041 else {
1042 idx = trans_idx[self->num_transitions - 1];
1043 }
1044
1045 _ttinfo *tti = &(self->_ttinfos[idx]);
1046 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1047 &(self->tzrule_after));
1048
1049 // We've abused the build_tzrule constructor to construct an STD-only
1050 // rule mimicking whatever ttinfo we've picked up, but it's possible
1051 // that the one we've picked up is a DST zone, so we need to make sure
1052 // that the dstoff is set correctly in that case.
1053 if (PyObject_IsTrue(tti->dstoff)) {
1054 _ttinfo *tti_after = &(self->tzrule_after.std);
1055 Py_DECREF(tti_after->dstoff);
1056 tti_after->dstoff = tti->dstoff;
1057 Py_INCREF(tti_after->dstoff);
1058 }
1059 }
1060
1061 // Determine if this is a "fixed offset" zone, meaning that the output of
1062 // the utcoffset, dst and tzname functions does not depend on the specific
1063 // datetime passed.
1064 //
1065 // We make three simplifying assumptions here:
1066 //
1067 // 1. If tzrule_after is not std_only, it has transitions that might occur
1068 // (it is possible to construct TZ strings that specify STD and DST but
1069 // no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1070 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1071 // represent different offsets.
1072 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1073 // fixed-offset zone with extra _ttinfos defined may appear to *not* be
1074 // a fixed offset zone).
1075 //
1076 // Violations to these assumptions would be fairly exotic, and exotic
1077 // zones should almost certainly not be used with datetime.time (the
1078 // only thing that would be affected by this).
1079 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1080 self->fixed_offset = 0;
1081 }
1082 else if (self->num_ttinfos == 0) {
1083 self->fixed_offset = 1;
1084 }
1085 else {
1086 int constant_offset =
1087 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1088 if (constant_offset < 0) {
1089 goto error;
1090 }
1091 else {
1092 self->fixed_offset = constant_offset;
1093 }
1094 }
1095
1096 int rv = 0;
1097 goto cleanup;
1098error:
1099 // These resources only need to be freed if we have failed, if we succeed
1100 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1101 // method to free the relevant resources.
1102 if (self->trans_list_utc != NULL) {
1103 PyMem_Free(self->trans_list_utc);
1104 self->trans_list_utc = NULL;
1105 }
1106
1107 for (size_t i = 0; i < 2; ++i) {
1108 if (self->trans_list_wall[i] != NULL) {
1109 PyMem_Free(self->trans_list_wall[i]);
1110 self->trans_list_wall[i] = NULL;
1111 }
1112 }
1113
1114 if (self->_ttinfos != NULL) {
1115 for (size_t i = 0; i < ttinfos_allocated; ++i) {
1116 xdecref_ttinfo(&(self->_ttinfos[i]));
1117 }
1118 PyMem_Free(self->_ttinfos);
1119 self->_ttinfos = NULL;
1120 }
1121
1122 if (self->trans_ttinfos != NULL) {
1123 PyMem_Free(self->trans_ttinfos);
1124 self->trans_ttinfos = NULL;
1125 }
1126
1127 rv = -1;
1128cleanup:
1129 Py_XDECREF(data_tuple);
1130
1131 if (utcoff != NULL) {
1132 PyMem_Free(utcoff);
1133 }
1134
1135 if (dstoff != NULL) {
1136 PyMem_Free(dstoff);
1137 }
1138
1139 if (isdst != NULL) {
1140 PyMem_Free(isdst);
1141 }
1142
1143 if (trans_idx != NULL) {
1144 PyMem_Free(trans_idx);
1145 }
1146
1147 return rv;
1148}
1149
1150/* Function to calculate the local timestamp of a transition from the year. */
1151int64_t
1152calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1153{
1154 CalendarRule *self = (CalendarRule *)base_self;
1155
1156 // We want (year, month, day of month); we have year and month, but we
1157 // need to turn (week, day-of-week) into day-of-month
1158 //
1159 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1160 // Week 5 represents the last occurrence of day `day`, so we need to know
1161 // the first weekday of the month and the number of days in the month.
1162 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1163 uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1164 if (self->month == 2 && is_leap_year(year)) {
1165 days_in_month += 1;
1166 }
1167
1168 // This equation seems magical, so I'll break it down:
1169 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1170 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1171 // because this math is mod 7
1172 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1173 // numbers so that -1 % 7 = 6).
1174 // 3. Add 1 because month days are a 1-based index.
1175 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1176 if (month_day < 0) {
1177 month_day += 7;
1178 }
1179 month_day += 1;
1180
1181 // Now use a 0-based index version of `week` to calculate the w-th
1182 // occurrence of `day`
1183 month_day += ((int8_t)(self->week) - 1) * 7;
1184
1185 // month_day will only be > days_in_month if w was 5, and `w` means "last
1186 // occurrence of `d`", so now we just check if we over-shot the end of the
1187 // month and if so knock off 1 week.
1188 if (month_day > days_in_month) {
1189 month_day -= 7;
1190 }
1191
1192 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1193 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1194 (int64_t)(self->minute * 60) + (int64_t)(self->second));
1195}
1196
1197/* Constructor for CalendarRule. */
1198int
1199calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1200 int8_t minute, int8_t second, CalendarRule *out)
1201{
1202 // These bounds come from the POSIX standard, which describes an Mm.n.d
1203 // rule as:
1204 //
1205 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1206 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1207 // may occur in either the fourth or the fifth week). Week 1 is the first
1208 // week in which the d'th day occurs. Day zero is Sunday.
1209 if (month <= 0 || month > 12) {
1210 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1211 return -1;
1212 }
1213
1214 if (week <= 0 || week > 5) {
1215 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1216 return -1;
1217 }
1218
1219 // day is an unsigned integer, so day < 0 should always return false, but
1220 // if day's type changes to a signed integer *without* changing this value,
1221 // it may create a bug. Considering that the compiler should be able to
1222 // optimize out the first comparison if day is an unsigned integer anyway,
1223 // we will leave this comparison in place and disable the compiler warning.
1224#pragma GCC diagnostic push
1225#pragma GCC diagnostic ignored "-Wtype-limits"
1226 if (day < 0 || day > 6) {
1227#pragma GCC diagnostic pop
1228 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1229 return -1;
1230 }
1231
1232 TransitionRuleType base = {&calendarrule_year_to_timestamp};
1233
1234 CalendarRule new_offset = {
1235 .base = base,
1236 .month = month,
1237 .week = week,
1238 .day = day,
1239 .hour = hour,
1240 .minute = minute,
1241 .second = second,
1242 };
1243
1244 *out = new_offset;
1245 return 0;
1246}
1247
1248/* Function to calculate the local timestamp of a transition from the year.
1249 *
1250 * This translates the day of the year into a local timestamp — either a
1251 * 1-based Julian day, not including leap days, or the 0-based year-day,
1252 * including leap days.
1253 * */
1254int64_t
1255dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1256{
1257 // The function signature requires a TransitionRuleType pointer, but this
1258 // function is only applicable to DayRule* objects.
1259 DayRule *self = (DayRule *)base_self;
1260
1261 // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1262 // to know the number of days since 1970-01-01, so we must subtract off
1263 // the equivalent of ymd_to_ord(1970, 1, 1).
1264 //
1265 // We subtract off an additional 1 day to account for January 1st (we want
1266 // the number of full days *before* the date of the transition - partial
1267 // days are accounted for in the hour, minute and second portions.
1268 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1269
1270 // The Julian day specification skips over February 29th in leap years,
1271 // from the POSIX standard:
1272 //
1273 // Leap days shall not be counted. That is, in all years-including leap
1274 // years-February 28 is day 59 and March 1 is day 60. It is impossible to
1275 // refer explicitly to the occasional February 29.
1276 //
1277 // This is actually more useful than you'd think — if you want a rule that
1278 // always transitions on a given calendar day (other than February 29th),
1279 // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1280 // always refers to December 31st.
1281 unsigned int day = self->day;
1282 if (self->julian && day >= 59 && is_leap_year(year)) {
1283 day += 1;
1284 }
1285
1286 return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1287 (self->minute * 60) + self->second;
1288}
1289
1290/* Constructor for DayRule. */
1291static int
1292dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1293 int8_t second, DayRule *out)
1294{
1295 // The POSIX standard specifies that Julian days must be in the range (1 <=
1296 // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1297 // be in the range (0 <= n <= 365).
1298 if (day < julian || day > 365) {
1299 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1300 julian, day);
1301 return -1;
1302 }
1303
1304 TransitionRuleType base = {
1305 &dayrule_year_to_timestamp,
1306 };
1307
1308 DayRule tmp = {
1309 .base = base,
1310 .julian = julian,
1311 .day = day,
1312 .hour = hour,
1313 .minute = minute,
1314 .second = second,
1315 };
1316
1317 *out = tmp;
1318
1319 return 0;
1320}
1321
1322/* Calculate the start and end rules for a _tzrule in the given year. */
1323static void
1324tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1325{
1326 assert(rule->start != NULL);
1327 assert(rule->end != NULL);
1328 *start = rule->start->year_to_timestamp(rule->start, year);
1329 *end = rule->end->year_to_timestamp(rule->end, year);
1330}
1331
1332/* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1333 *
1334 * This takes a local timestamp and fold for disambiguation purposes; the year
1335 * could technically be calculated from the timestamp, but given that the
1336 * callers of this function already have the year information accessible from
1337 * the datetime struct, it is taken as an additional parameter to reduce
1338 * unncessary calculation.
1339 * */
1340static _ttinfo *
1341find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1342{
1343 if (rule->std_only) {
1344 return &(rule->std);
1345 }
1346
1347 int64_t start, end;
1348 uint8_t isdst;
1349
1350 tzrule_transitions(rule, year, &start, &end);
1351
1352 // With fold = 0, the period (denominated in local time) with the smaller
1353 // offset starts at the end of the gap and ends at the end of the fold;
1354 // with fold = 1, it runs from the start of the gap to the beginning of the
1355 // fold.
1356 //
1357 // So in order to determine the DST boundaries we need to know both the
1358 // fold and whether DST is positive or negative (rare), and it turns out
1359 // that this boils down to fold XOR is_positive.
1360 if (fold == (rule->dst_diff >= 0)) {
1361 end -= rule->dst_diff;
1362 }
1363 else {
1364 start += rule->dst_diff;
1365 }
1366
1367 if (start < end) {
1368 isdst = (ts >= start) && (ts < end);
1369 }
1370 else {
1371 isdst = (ts < end) || (ts >= start);
1372 }
1373
1374 if (isdst) {
1375 return &(rule->dst);
1376 }
1377 else {
1378 return &(rule->std);
1379 }
1380}
1381
1382/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1383 *
1384 * This function can determine the _ttinfo that applies at a given epoch time,
1385 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1386 * This is to be used in the .fromutc() function.
1387 *
1388 * The year is technically a redundant parameter, because it can be calculated
1389 * from the timestamp, but all callers of this function should have the year
1390 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1391 * calculation.
1392 **/
1393static _ttinfo *
1394find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1395 unsigned char *fold)
1396{
1397 if (rule->std_only) {
1398 *fold = 0;
1399 return &(rule->std);
1400 }
1401
1402 int64_t start, end;
1403 uint8_t isdst;
1404 tzrule_transitions(rule, year, &start, &end);
1405 start -= rule->std.utcoff_seconds;
1406 end -= rule->dst.utcoff_seconds;
1407
1408 if (start < end) {
1409 isdst = (ts >= start) && (ts < end);
1410 }
1411 else {
1412 isdst = (ts < end) || (ts >= start);
1413 }
1414
1415 // For positive DST, the ambiguous period is one dst_diff after the end of
1416 // DST; for negative DST, the ambiguous period is one dst_diff before the
1417 // start of DST.
1418 int64_t ambig_start, ambig_end;
1419 if (rule->dst_diff > 0) {
1420 ambig_start = end;
1421 ambig_end = end + rule->dst_diff;
1422 }
1423 else {
1424 ambig_start = start;
1425 ambig_end = start - rule->dst_diff;
1426 }
1427
1428 *fold = (ts >= ambig_start) && (ts < ambig_end);
1429
1430 if (isdst) {
1431 return &(rule->dst);
1432 }
1433 else {
1434 return &(rule->std);
1435 }
1436}
1437
1438/* Parse a TZ string in the format specified by the POSIX standard:
1439 *
1440 * std offset[dst[offset],start[/time],end[/time]]
1441 *
1442 * std and dst must be 3 or more characters long and must not contain a
1443 * leading colon, embedded digits, commas, nor a plus or minus signs; The
1444 * spaces between "std" and "offset" are only for display and are not actually
1445 * present in the string.
1446 *
1447 * The format of the offset is ``[+|-]hh[:mm[:ss]]``
1448 *
1449 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1450 *
1451 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1452 */
1453static int
1454parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1455{
1456 PyObject *std_abbr = NULL;
1457 PyObject *dst_abbr = NULL;
1458 TransitionRuleType *start = NULL;
1459 TransitionRuleType *end = NULL;
Dong-hee Naa487a392020-05-22 01:56:03 +09001460 // Initialize offsets to invalid value (> 24 hours)
1461 long std_offset = 1 << 20;
1462 long dst_offset = 1 << 20;
Paul Ganssle62972d92020-05-16 04:20:06 -04001463
1464 char *tz_str = PyBytes_AsString(tz_str_obj);
1465 if (tz_str == NULL) {
1466 return -1;
1467 }
1468 char *p = tz_str;
1469
1470 // Read the `std` abbreviation, which must be at least 3 characters long.
Pablo Galindoe4799b92020-05-27 21:48:12 +01001471 Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
Paul Ganssle62972d92020-05-16 04:20:06 -04001472 if (num_chars < 1) {
1473 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1474 goto error;
1475 }
1476
1477 p += num_chars;
1478
1479 // Now read the STD offset, which is required
1480 num_chars = parse_tz_delta(p, &std_offset);
1481 if (num_chars < 0) {
1482 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1483 goto error;
1484 }
1485 p += num_chars;
1486
1487 // If the string ends here, there is no DST, otherwise we must parse the
1488 // DST abbreviation and start and end dates and times.
1489 if (*p == '\0') {
1490 goto complete;
1491 }
1492
1493 num_chars = parse_abbr(p, &dst_abbr);
1494 if (num_chars < 1) {
1495 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1496 goto error;
1497 }
1498 p += num_chars;
1499
1500 if (*p == ',') {
1501 // From the POSIX standard:
1502 //
1503 // If no offset follows dst, the alternative time is assumed to be one
1504 // hour ahead of standard time.
1505 dst_offset = std_offset + 3600;
1506 }
1507 else {
1508 num_chars = parse_tz_delta(p, &dst_offset);
1509 if (num_chars < 0) {
1510 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1511 tz_str_obj);
1512 goto error;
1513 }
1514
1515 p += num_chars;
1516 }
1517
1518 TransitionRuleType **transitions[2] = {&start, &end};
1519 for (size_t i = 0; i < 2; ++i) {
1520 if (*p != ',') {
1521 PyErr_Format(PyExc_ValueError,
1522 "Missing transition rules in TZ string: %R",
1523 tz_str_obj);
1524 goto error;
1525 }
1526 p++;
1527
1528 num_chars = parse_transition_rule(p, transitions[i]);
1529 if (num_chars < 0) {
1530 PyErr_Format(PyExc_ValueError,
1531 "Malformed transition rule in TZ string: %R",
1532 tz_str_obj);
1533 goto error;
1534 }
1535 p += num_chars;
1536 }
1537
1538 if (*p != '\0') {
1539 PyErr_Format(PyExc_ValueError,
1540 "Extraneous characters at end of TZ string: %R",
1541 tz_str_obj);
1542 goto error;
1543 }
1544
1545complete:
1546 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1547 Py_DECREF(std_abbr);
1548 Py_XDECREF(dst_abbr);
1549
1550 return 0;
1551error:
1552 Py_XDECREF(std_abbr);
1553 if (dst_abbr != NULL && dst_abbr != Py_None) {
1554 Py_DECREF(dst_abbr);
1555 }
1556
1557 if (start != NULL) {
1558 PyMem_Free(start);
1559 }
1560
1561 if (end != NULL) {
1562 PyMem_Free(end);
1563 }
1564
1565 return -1;
1566}
1567
Pablo Galindoe4799b92020-05-27 21:48:12 +01001568static int
1569parse_uint(const char *const p, uint8_t *value)
Paul Ganssle62972d92020-05-16 04:20:06 -04001570{
1571 if (!isdigit(*p)) {
1572 return -1;
1573 }
1574
Pablo Galindoe4799b92020-05-27 21:48:12 +01001575 *value = (*p) - '0';
1576 return 0;
Paul Ganssle62972d92020-05-16 04:20:06 -04001577}
1578
1579/* Parse the STD and DST abbreviations from a TZ string. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001580static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001581parse_abbr(const char *const p, PyObject **abbr)
1582{
1583 const char *ptr = p;
1584 char buff = *ptr;
1585 const char *str_start;
1586 const char *str_end;
1587
1588 if (*ptr == '<') {
1589 ptr++;
1590 str_start = ptr;
1591 while ((buff = *ptr) != '>') {
1592 // From the POSIX standard:
1593 //
1594 // In the quoted form, the first character shall be the less-than
1595 // ( '<' ) character and the last character shall be the
1596 // greater-than ( '>' ) character. All characters between these
1597 // quoting characters shall be alphanumeric characters from the
1598 // portable character set in the current locale, the plus-sign (
1599 // '+' ) character, or the minus-sign ( '-' ) character. The std
1600 // and dst fields in this case shall not include the quoting
1601 // characters.
1602 if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1603 buff != '-') {
1604 return -1;
1605 }
1606 ptr++;
1607 }
1608 str_end = ptr;
1609 ptr++;
1610 }
1611 else {
1612 str_start = p;
1613 // From the POSIX standard:
1614 //
1615 // In the unquoted form, all characters in these fields shall be
1616 // alphabetic characters from the portable character set in the
1617 // current locale.
1618 while (isalpha(*ptr)) {
1619 ptr++;
1620 }
1621 str_end = ptr;
1622 }
1623
1624 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1625 if (abbr == NULL) {
1626 return -1;
1627 }
1628
1629 return ptr - p;
1630}
1631
1632/* Parse a UTC offset from a TZ str. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001633static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001634parse_tz_delta(const char *const p, long *total_seconds)
1635{
1636 // From the POSIX spec:
1637 //
1638 // Indicates the value added to the local time to arrive at Coordinated
1639 // Universal Time. The offset has the form:
1640 //
1641 // hh[:mm[:ss]]
1642 //
1643 // One or more digits may be used; the value is always interpreted as a
1644 // decimal number.
1645 //
1646 // The POSIX spec says that the values for `hour` must be between 0 and 24
1647 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1648 // transition times may be signed and range from -167 to 167.
1649 long sign = -1;
1650 long hours = 0;
1651 long minutes = 0;
1652 long seconds = 0;
1653
1654 const char *ptr = p;
1655 char buff = *ptr;
1656 if (buff == '-' || buff == '+') {
1657 // Negative numbers correspond to *positive* offsets, from the spec:
1658 //
1659 // If preceded by a '-', the timezone shall be east of the Prime
1660 // Meridian; otherwise, it shall be west (which may be indicated by
1661 // an optional preceding '+' ).
1662 if (buff == '-') {
1663 sign = 1;
1664 }
1665
1666 ptr++;
1667 }
1668
1669 // The hour can be 1 or 2 numeric characters
1670 for (size_t i = 0; i < 2; ++i) {
1671 buff = *ptr;
1672 if (!isdigit(buff)) {
1673 if (i == 0) {
1674 return -1;
1675 }
1676 else {
1677 break;
1678 }
1679 }
1680
1681 hours *= 10;
1682 hours += buff - '0';
1683 ptr++;
1684 }
1685
1686 if (hours > 24 || hours < 0) {
1687 return -1;
1688 }
1689
1690 // Minutes and seconds always of the format ":dd"
1691 long *outputs[2] = {&minutes, &seconds};
1692 for (size_t i = 0; i < 2; ++i) {
1693 if (*ptr != ':') {
1694 goto complete;
1695 }
1696 ptr++;
1697
1698 for (size_t j = 0; j < 2; ++j) {
1699 buff = *ptr;
1700 if (!isdigit(buff)) {
1701 return -1;
1702 }
1703 *(outputs[i]) *= 10;
1704 *(outputs[i]) += buff - '0';
1705 ptr++;
1706 }
1707 }
1708
1709complete:
1710 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1711
1712 return ptr - p;
1713}
1714
1715/* Parse the date portion of a transition rule. */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001716static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001717parse_transition_rule(const char *const p, TransitionRuleType **out)
1718{
1719 // The full transition rule indicates when to change back and forth between
1720 // STD and DST, and has the form:
1721 //
1722 // date[/time],date[/time]
1723 //
1724 // This function parses an individual date[/time] section, and returns
1725 // the number of characters that contributed to the transition rule. This
1726 // does not include the ',' at the end of the first rule.
1727 //
1728 // The POSIX spec states that if *time* is not given, the default is 02:00.
1729 const char *ptr = p;
1730 int8_t hour = 2;
1731 int8_t minute = 0;
1732 int8_t second = 0;
1733
1734 // Rules come in one of three flavors:
1735 //
1736 // 1. Jn: Julian day n, with no leap days.
1737 // 2. n: Day of year (0-based, with leap days)
1738 // 3. Mm.n.d: Specifying by month, week and day-of-week.
1739
1740 if (*ptr == 'M') {
1741 uint8_t month, week, day;
1742 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001743 if (parse_uint(ptr, &month)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001744 return -1;
1745 }
Paul Ganssle62972d92020-05-16 04:20:06 -04001746 ptr++;
1747 if (*ptr != '.') {
Pablo Galindoe4799b92020-05-27 21:48:12 +01001748 uint8_t tmp;
1749 if (parse_uint(ptr, &tmp)) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001750 return -1;
1751 }
1752
1753 month *= 10;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001754 month += tmp;
Paul Ganssle62972d92020-05-16 04:20:06 -04001755 ptr++;
1756 }
1757
1758 uint8_t *values[2] = {&week, &day};
1759 for (size_t i = 0; i < 2; ++i) {
1760 if (*ptr != '.') {
1761 return -1;
1762 }
1763 ptr++;
1764
Pablo Galindoe4799b92020-05-27 21:48:12 +01001765 if (parse_uint(ptr, values[i])) {
Paul Ganssle62972d92020-05-16 04:20:06 -04001766 return -1;
1767 }
1768 ptr++;
Paul Ganssle62972d92020-05-16 04:20:06 -04001769 }
1770
1771 if (*ptr == '/') {
1772 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001773 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001774 parse_transition_time(ptr, &hour, &minute, &second);
1775 if (num_chars < 0) {
1776 return -1;
1777 }
1778 ptr += num_chars;
1779 }
1780
1781 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1782 if (rv == NULL) {
1783 return -1;
1784 }
1785
1786 if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1787 PyMem_Free(rv);
1788 return -1;
1789 }
1790
1791 *out = (TransitionRuleType *)rv;
1792 }
1793 else {
1794 uint8_t julian = 0;
1795 unsigned int day = 0;
1796 if (*ptr == 'J') {
1797 julian = 1;
1798 ptr++;
1799 }
1800
1801 for (size_t i = 0; i < 3; ++i) {
1802 if (!isdigit(*ptr)) {
1803 if (i == 0) {
1804 return -1;
1805 }
1806 break;
1807 }
1808 day *= 10;
1809 day += (*ptr) - '0';
1810 ptr++;
1811 }
1812
1813 if (*ptr == '/') {
1814 ptr++;
Pablo Galindoe4799b92020-05-27 21:48:12 +01001815 Py_ssize_t num_chars =
Paul Ganssle62972d92020-05-16 04:20:06 -04001816 parse_transition_time(ptr, &hour, &minute, &second);
1817 if (num_chars < 0) {
1818 return -1;
1819 }
1820 ptr += num_chars;
1821 }
1822
1823 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1824 if (rv == NULL) {
1825 return -1;
1826 }
1827
1828 if (dayrule_new(julian, day, hour, minute, second, rv)) {
1829 PyMem_Free(rv);
1830 return -1;
1831 }
1832 *out = (TransitionRuleType *)rv;
1833 }
1834
1835 return ptr - p;
1836}
1837
1838/* Parse the time portion of a transition rule (e.g. following an /) */
Pablo Galindoe4799b92020-05-27 21:48:12 +01001839static Py_ssize_t
Paul Ganssle62972d92020-05-16 04:20:06 -04001840parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1841 int8_t *second)
1842{
1843 // From the spec:
1844 //
1845 // The time has the same format as offset except that no leading sign
1846 // ( '-' or '+' ) is allowed.
1847 //
1848 // The format for the offset is:
1849 //
1850 // h[h][:mm[:ss]]
1851 //
1852 // RFC 8536 also allows transition times to be signed and to range from
1853 // -167 to +167, but the current version only supports [0, 99].
1854 //
1855 // TODO: Support the full range of transition hours.
1856 int8_t *components[3] = {hour, minute, second};
1857 const char *ptr = p;
1858 int8_t sign = 1;
1859
1860 if (*ptr == '-' || *ptr == '+') {
1861 if (*ptr == '-') {
1862 sign = -1;
1863 }
1864 ptr++;
1865 }
1866
1867 for (size_t i = 0; i < 3; ++i) {
1868 if (i > 0) {
1869 if (*ptr != ':') {
1870 break;
1871 }
1872 ptr++;
1873 }
1874
1875 uint8_t buff = 0;
1876 for (size_t j = 0; j < 2; j++) {
1877 if (!isdigit(*ptr)) {
1878 if (i == 0 && j > 0) {
1879 break;
1880 }
1881 return -1;
1882 }
1883
1884 buff *= 10;
1885 buff += (*ptr) - '0';
1886 ptr++;
1887 }
1888
1889 *(components[i]) = sign * buff;
1890 }
1891
1892 return ptr - p;
1893}
1894
1895/* Constructor for a _tzrule.
1896 *
1897 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1898 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1899 * NULL as well.
1900 *
1901 * Returns 0 on success.
1902 */
1903static int
1904build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1905 long dst_offset, TransitionRuleType *start,
1906 TransitionRuleType *end, _tzrule *out)
1907{
Dong-hee Naa487a392020-05-22 01:56:03 +09001908 _tzrule rv = {{0}};
Paul Ganssle62972d92020-05-16 04:20:06 -04001909
1910 rv.start = start;
1911 rv.end = end;
1912
1913 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1914 goto error;
1915 }
1916
1917 if (dst_abbr != NULL) {
1918 rv.dst_diff = dst_offset - std_offset;
1919 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1920 goto error;
1921 }
1922 }
1923 else {
1924 rv.std_only = 1;
1925 }
1926
1927 *out = rv;
1928
1929 return 0;
1930error:
1931 xdecref_ttinfo(&rv.std);
1932 xdecref_ttinfo(&rv.dst);
1933 return -1;
1934}
1935
1936/* Destructor for _tzrule. */
1937static void
1938free_tzrule(_tzrule *tzrule)
1939{
1940 xdecref_ttinfo(&(tzrule->std));
1941 if (!tzrule->std_only) {
1942 xdecref_ttinfo(&(tzrule->dst));
1943 }
1944
1945 if (tzrule->start != NULL) {
1946 PyMem_Free(tzrule->start);
1947 }
1948
1949 if (tzrule->end != NULL) {
1950 PyMem_Free(tzrule->end);
1951 }
1952}
1953
1954/* Calculate DST offsets from transitions and UTC offsets
1955 *
1956 * This is necessary because each C `ttinfo` only contains the UTC offset,
1957 * time zone abbreviation and an isdst boolean - it does not include the
1958 * amount of the DST offset, but we need the amount for the dst() function.
1959 *
1960 * Thus function uses heuristics to infer what the offset should be, so it
1961 * is not guaranteed that this will work for all zones. If we cannot assign
1962 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1963 * bool(dt.dst()) will always match ttinfo.isdst.
1964 */
1965static void
1966utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1967 unsigned char *isdsts, size_t num_transitions,
1968 size_t num_ttinfos)
1969{
1970 size_t dst_count = 0;
1971 size_t dst_found = 0;
1972 for (size_t i = 0; i < num_ttinfos; ++i) {
1973 dst_count++;
1974 }
1975
1976 for (size_t i = 1; i < num_transitions; ++i) {
1977 if (dst_count == dst_found) {
1978 break;
1979 }
1980
1981 size_t idx = trans_idx[i];
1982 size_t comp_idx = trans_idx[i - 1];
1983
1984 // Only look at DST offsets that have nto been assigned already
1985 if (!isdsts[idx] || dstoffs[idx] != 0) {
1986 continue;
1987 }
1988
1989 long dstoff = 0;
1990 long utcoff = utcoffs[idx];
1991
1992 if (!isdsts[comp_idx]) {
1993 dstoff = utcoff - utcoffs[comp_idx];
1994 }
1995
1996 if (!dstoff && idx < (num_ttinfos - 1)) {
1997 comp_idx = trans_idx[i + 1];
1998
1999 // If the following transition is also DST and we couldn't find
2000 // the DST offset by this point, we're going to have to skip it
2001 // and hope this transition gets assigned later
2002 if (isdsts[comp_idx]) {
2003 continue;
2004 }
2005
2006 dstoff = utcoff - utcoffs[comp_idx];
2007 }
2008
2009 if (dstoff) {
2010 dst_found++;
2011 dstoffs[idx] = dstoff;
2012 }
2013 }
2014
2015 if (dst_found < dst_count) {
2016 // If there are time zones we didn't find a value for, we'll end up
2017 // with dstoff = 0 for something where isdst=1. This is obviously
2018 // wrong — one hour will be a much better guess than 0.
2019 for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2020 if (isdsts[idx] && !dstoffs[idx]) {
2021 dstoffs[idx] = 3600;
2022 }
2023 }
2024 }
2025}
2026
2027#define _swap(x, y, buffer) \
2028 buffer = x; \
2029 x = y; \
2030 y = buffer;
2031
2032/* Calculate transitions in local time from UTC time and offsets.
2033 *
2034 * We want to know when each transition occurs, denominated in the number of
2035 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2036 * *local time* (note: this is *not* equivalent to the output of
2037 * datetime.timestamp, which is the total number of seconds actual elapsed
2038 * since 1970-01-01T00:00:00Z in UTC).
2039 *
2040 * This is an ambiguous question because "local time" can be ambiguous — but it
2041 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2042 *
2043 * trans_local[0]: The wall-time transitions for fold=0
2044 * trans_local[1]: The wall-time transitions for fold=1
2045 *
2046 * This returns 0 on success and a negative number of failure. The trans_local
2047 * arrays must be freed if they are not NULL.
2048 */
2049static int
2050ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2051 int64_t *trans_local[2], size_t num_ttinfos,
2052 size_t num_transitions)
2053{
2054 if (num_transitions == 0) {
2055 return 0;
2056 }
2057
2058 // Copy the UTC transitions into each array to be modified in place later
2059 for (size_t i = 0; i < 2; ++i) {
2060 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2061 if (trans_local[i] == NULL) {
2062 return -1;
2063 }
2064
2065 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2066 }
2067
2068 int64_t offset_0, offset_1, buff;
2069 if (num_ttinfos > 1) {
2070 offset_0 = utcoff[0];
2071 offset_1 = utcoff[trans_idx[0]];
2072
2073 if (offset_1 > offset_0) {
2074 _swap(offset_0, offset_1, buff);
2075 }
2076 }
2077 else {
2078 offset_0 = utcoff[0];
2079 offset_1 = utcoff[0];
2080 }
2081
2082 trans_local[0][0] += offset_0;
2083 trans_local[1][0] += offset_1;
2084
2085 for (size_t i = 1; i < num_transitions; ++i) {
2086 offset_0 = utcoff[trans_idx[i - 1]];
2087 offset_1 = utcoff[trans_idx[i]];
2088
2089 if (offset_1 > offset_0) {
2090 _swap(offset_1, offset_0, buff);
2091 }
2092
2093 trans_local[0][i] += offset_0;
2094 trans_local[1][i] += offset_1;
2095 }
2096
2097 return 0;
2098}
2099
2100/* Simple bisect_right binary search implementation */
2101static size_t
2102_bisect(const int64_t value, const int64_t *arr, size_t size)
2103{
2104 size_t lo = 0;
2105 size_t hi = size;
2106 size_t m;
2107
2108 while (lo < hi) {
2109 m = (lo + hi) / 2;
2110 if (arr[m] > value) {
2111 hi = m;
2112 }
2113 else {
2114 lo = m + 1;
2115 }
2116 }
2117
2118 return hi;
2119}
2120
2121/* Find the ttinfo rules that apply at a given local datetime. */
2122static _ttinfo *
2123find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2124{
2125 // datetime.time has a .tzinfo attribute that passes None as the dt
2126 // argument; it only really has meaning for fixed-offset zones.
2127 if (dt == Py_None) {
2128 if (self->fixed_offset) {
2129 return &(self->tzrule_after.std);
2130 }
2131 else {
2132 return &NO_TTINFO;
2133 }
2134 }
2135
2136 int64_t ts;
2137 if (get_local_timestamp(dt, &ts)) {
2138 return NULL;
2139 }
2140
2141 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2142 assert(fold < 2);
2143 int64_t *local_transitions = self->trans_list_wall[fold];
2144 size_t num_trans = self->num_transitions;
2145
2146 if (num_trans && ts < local_transitions[0]) {
2147 return self->ttinfo_before;
2148 }
2149 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2150 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2151 PyDateTime_GET_YEAR(dt));
2152 }
2153 else {
2154 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2155 assert(idx < self->num_transitions);
2156 return self->trans_ttinfos[idx];
2157 }
2158}
2159
2160static int
2161is_leap_year(int year)
2162{
2163 const unsigned int ayear = (unsigned int)year;
2164 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2165}
2166
2167/* Calculates ordinal datetime from year, month and day. */
2168static int
2169ymd_to_ord(int y, int m, int d)
2170{
2171 y -= 1;
2172 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2173 int yearday = DAYS_BEFORE_MONTH[m];
2174 if (m > 2 && is_leap_year(y + 1)) {
2175 yearday += 1;
2176 }
2177
2178 return days_before_year + yearday + d;
2179}
2180
2181/* Calculate the number of seconds since 1970-01-01 in local time.
2182 *
2183 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2184 * can easily determine which transitions a datetime falls between. See the
2185 * comment above ts_to_local for more information.
2186 * */
2187static int
2188get_local_timestamp(PyObject *dt, int64_t *local_ts)
2189{
2190 assert(local_ts != NULL);
2191
2192 int hour, minute, second;
2193 int ord;
2194 if (PyDateTime_CheckExact(dt)) {
2195 int y = PyDateTime_GET_YEAR(dt);
2196 int m = PyDateTime_GET_MONTH(dt);
2197 int d = PyDateTime_GET_DAY(dt);
2198 hour = PyDateTime_DATE_GET_HOUR(dt);
2199 minute = PyDateTime_DATE_GET_MINUTE(dt);
2200 second = PyDateTime_DATE_GET_SECOND(dt);
2201
2202 ord = ymd_to_ord(y, m, d);
2203 }
2204 else {
2205 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2206 if (num == NULL) {
2207 return -1;
2208 }
2209
2210 ord = PyLong_AsLong(num);
2211 Py_DECREF(num);
2212 if (ord == -1 && PyErr_Occurred()) {
2213 return -1;
2214 }
2215
2216 num = PyObject_GetAttrString(dt, "hour");
2217 if (num == NULL) {
2218 return -1;
2219 }
2220 hour = PyLong_AsLong(num);
2221 Py_DECREF(num);
2222 if (hour == -1) {
2223 return -1;
2224 }
2225
2226 num = PyObject_GetAttrString(dt, "minute");
2227 if (num == NULL) {
2228 return -1;
2229 }
2230 minute = PyLong_AsLong(num);
2231 Py_DECREF(num);
2232 if (minute == -1) {
2233 return -1;
2234 }
2235
2236 num = PyObject_GetAttrString(dt, "second");
2237 if (num == NULL) {
2238 return -1;
2239 }
2240 second = PyLong_AsLong(num);
2241 Py_DECREF(num);
2242 if (second == -1) {
2243 return -1;
2244 }
2245 }
2246
2247 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2248 (int64_t)(hour * 3600 + minute * 60 + second);
2249
2250 return 0;
2251}
2252
2253/////
2254// Functions for cache handling
2255
2256/* Constructor for StrongCacheNode */
2257static StrongCacheNode *
2258strong_cache_node_new(PyObject *key, PyObject *zone)
2259{
2260 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2261 if (node == NULL) {
2262 return NULL;
2263 }
2264
2265 Py_INCREF(key);
2266 Py_INCREF(zone);
2267
2268 node->next = NULL;
2269 node->prev = NULL;
2270 node->key = key;
2271 node->zone = zone;
2272
2273 return node;
2274}
2275
2276/* Destructor for StrongCacheNode */
2277void
2278strong_cache_node_free(StrongCacheNode *node)
2279{
2280 Py_XDECREF(node->key);
2281 Py_XDECREF(node->zone);
2282
2283 PyMem_Free(node);
2284}
2285
2286/* Frees all nodes at or after a specified root in the strong cache.
2287 *
2288 * This can be used on the root node to free the entire cache or it can be used
2289 * to clear all nodes that have been expired (which, if everything is going
2290 * right, will actually only be 1 node at a time).
2291 */
2292void
2293strong_cache_free(StrongCacheNode *root)
2294{
2295 StrongCacheNode *node = root;
2296 StrongCacheNode *next_node;
2297 while (node != NULL) {
2298 next_node = node->next;
2299 strong_cache_node_free(node);
2300
2301 node = next_node;
2302 }
2303}
2304
2305/* Removes a node from the cache and update its neighbors.
2306 *
2307 * This is used both when ejecting a node from the cache and when moving it to
2308 * the front of the cache.
2309 */
2310static void
2311remove_from_strong_cache(StrongCacheNode *node)
2312{
2313 if (ZONEINFO_STRONG_CACHE == node) {
2314 ZONEINFO_STRONG_CACHE = node->next;
2315 }
2316
2317 if (node->prev != NULL) {
2318 node->prev->next = node->next;
2319 }
2320
2321 if (node->next != NULL) {
2322 node->next->prev = node->prev;
2323 }
2324
2325 node->next = NULL;
2326 node->prev = NULL;
2327}
2328
2329/* Retrieves the node associated with a key, if it exists.
2330 *
2331 * This traverses the strong cache until it finds a matching key and returns a
2332 * pointer to the relevant node if found. Returns NULL if no node is found.
2333 *
2334 * root may be NULL, indicating an empty cache.
2335 */
2336static StrongCacheNode *
2337find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2338{
2339 const StrongCacheNode *node = root;
2340 while (node != NULL) {
2341 if (PyObject_RichCompareBool(key, node->key, Py_EQ)) {
2342 return (StrongCacheNode *)node;
2343 }
2344
2345 node = node->next;
2346 }
2347
2348 return NULL;
2349}
2350
2351/* Ejects a given key from the class's strong cache, if applicable.
2352 *
2353 * This function is used to enable the per-key functionality in clear_cache.
2354 */
2355static void
2356eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2357{
2358 if (type != &PyZoneInfo_ZoneInfoType) {
2359 return;
2360 }
2361
2362 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2363 if (node != NULL) {
2364 remove_from_strong_cache(node);
2365
2366 strong_cache_node_free(node);
2367 }
2368}
2369
2370/* Moves a node to the front of the LRU cache.
2371 *
2372 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2373 * it is not at the front of the cache, it needs to be moved there.
2374 */
2375static void
2376move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2377{
2378 StrongCacheNode *root_p = *root;
2379 if (root_p == node) {
2380 return;
2381 }
2382
2383 remove_from_strong_cache(node);
2384
2385 node->prev = NULL;
2386 node->next = root_p;
2387
2388 if (root_p != NULL) {
2389 root_p->prev = node;
2390 }
2391
2392 *root = node;
2393}
2394
2395/* Retrieves a ZoneInfo from the strong cache if it's present.
2396 *
2397 * This function finds the ZoneInfo by key and if found will move the node to
2398 * the front of the LRU cache and return a new reference to it. It returns NULL
2399 * if the key is not in the cache.
2400 *
2401 * The strong cache is currently only implemented for the base class, so this
2402 * always returns a cache miss for subclasses.
2403 */
2404static PyObject *
2405zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2406{
2407 if (type != &PyZoneInfo_ZoneInfoType) {
2408 return NULL; // Strong cache currently only implemented for base class
2409 }
2410
2411 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2412
2413 if (node != NULL) {
2414 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2415 Py_INCREF(node->zone);
2416 return node->zone;
2417 }
2418
2419 return NULL; // Cache miss
2420}
2421
2422/* Inserts a new key into the strong LRU cache.
2423 *
2424 * This function is only to be used after a cache miss — it creates a new node
2425 * at the front of the cache and ejects any stale entries (keeping the size of
2426 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2427 */
2428static void
2429update_strong_cache(const PyTypeObject *const type, PyObject *key,
2430 PyObject *zone)
2431{
2432 if (type != &PyZoneInfo_ZoneInfoType) {
2433 return;
2434 }
2435
2436 StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2437
2438 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2439
2440 StrongCacheNode *node = new_node->next;
2441 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2442 if (node == NULL) {
2443 return;
2444 }
2445 node = node->next;
2446 }
2447
2448 // Everything beyond this point needs to be freed
2449 if (node != NULL) {
2450 if (node->prev != NULL) {
2451 node->prev->next = NULL;
2452 }
2453 strong_cache_free(node);
2454 }
2455}
2456
2457/* Clears all entries into a type's strong cache.
2458 *
2459 * Because the strong cache is not implemented for subclasses, this is a no-op
2460 * for everything except the base class.
2461 */
2462void
2463clear_strong_cache(const PyTypeObject *const type)
2464{
2465 if (type != &PyZoneInfo_ZoneInfoType) {
2466 return;
2467 }
2468
2469 strong_cache_free(ZONEINFO_STRONG_CACHE);
2470}
2471
2472static PyObject *
2473new_weak_cache()
2474{
2475 PyObject *weakref_module = PyImport_ImportModule("weakref");
2476 if (weakref_module == NULL) {
2477 return NULL;
2478 }
2479
2480 PyObject *weak_cache =
2481 PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2482 Py_DECREF(weakref_module);
2483 return weak_cache;
2484}
2485
2486static int
2487initialize_caches()
2488{
Ammar Askar06a1b892020-05-22 16:10:55 +00002489 // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
Paul Ganssle62972d92020-05-16 04:20:06 -04002490 if (TIMEDELTA_CACHE == NULL) {
2491 TIMEDELTA_CACHE = PyDict_New();
2492 }
2493 else {
2494 Py_INCREF(TIMEDELTA_CACHE);
2495 }
2496
2497 if (TIMEDELTA_CACHE == NULL) {
2498 return -1;
2499 }
2500
2501 if (ZONEINFO_WEAK_CACHE == NULL) {
2502 ZONEINFO_WEAK_CACHE = new_weak_cache();
2503 }
2504 else {
2505 Py_INCREF(ZONEINFO_WEAK_CACHE);
2506 }
2507
2508 if (ZONEINFO_WEAK_CACHE == NULL) {
2509 return -1;
2510 }
2511
2512 return 0;
2513}
2514
2515static PyObject *
2516zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2517{
2518 PyObject *weak_cache = new_weak_cache();
2519 if (weak_cache == NULL) {
2520 return NULL;
2521 }
2522
2523 PyObject_SetAttrString((PyObject *)cls, "_weak_cache", weak_cache);
2524 Py_RETURN_NONE;
2525}
2526
2527/////
2528// Specify the ZoneInfo type
2529static PyMethodDef zoneinfo_methods[] = {
2530 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2531 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2532 PyDoc_STR("Clear the ZoneInfo cache.")},
2533 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2534 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2535 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2536 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2537 METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2538 PyDoc_STR("Create a ZoneInfo file from a file object.")},
2539 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2540 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2541 "the given datetime.")},
2542 {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2543 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2544 "in a zone at the given datetime.")},
2545 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2546 PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2547 "zone that applies in a zone at a given datetime.")},
2548 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2549 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2550 "datetime in local time.")},
2551 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2552 PyDoc_STR("Function for serialization with the pickle protocol.")},
2553 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2554 PyDoc_STR("Private method used in unpickling.")},
2555 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2556 METH_VARARGS | METH_KEYWORDS,
2557 PyDoc_STR("Function to initialize subclasses.")},
2558 {NULL} /* Sentinel */
2559};
2560
2561static PyMemberDef zoneinfo_members[] = {
2562 {.name = "key",
2563 .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2564 .type = T_OBJECT_EX,
2565 .flags = READONLY,
2566 .doc = NULL},
2567 {NULL}, /* Sentinel */
2568};
2569
2570static PyTypeObject PyZoneInfo_ZoneInfoType = {
2571 PyVarObject_HEAD_INIT(NULL, 0) //
2572 .tp_name = "zoneinfo.ZoneInfo",
2573 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2574 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2575 .tp_repr = (reprfunc)zoneinfo_repr,
2576 .tp_str = (reprfunc)zoneinfo_str,
2577 .tp_getattro = PyObject_GenericGetAttr,
2578 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2579 /* .tp_doc = zoneinfo_doc, */
2580 .tp_methods = zoneinfo_methods,
2581 .tp_members = zoneinfo_members,
2582 .tp_new = zoneinfo_new,
2583 .tp_dealloc = zoneinfo_dealloc,
2584};
2585
2586/////
2587// Specify the _zoneinfo module
2588static PyMethodDef module_methods[] = {{NULL, NULL}};
2589static void
2590module_free()
2591{
2592 Py_XDECREF(_tzpath_find_tzfile);
2593 _tzpath_find_tzfile = NULL;
2594
2595 Py_XDECREF(_common_mod);
2596 _common_mod = NULL;
2597
2598 Py_XDECREF(io_open);
2599 io_open = NULL;
2600
2601 xdecref_ttinfo(&NO_TTINFO);
2602
Ammar Askar06a1b892020-05-22 16:10:55 +00002603 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2604 Py_DECREF(TIMEDELTA_CACHE);
2605 } else {
2606 Py_CLEAR(TIMEDELTA_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002607 }
2608
Ammar Askar06a1b892020-05-22 16:10:55 +00002609 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2610 Py_DECREF(ZONEINFO_WEAK_CACHE);
2611 } else {
2612 Py_CLEAR(ZONEINFO_WEAK_CACHE);
Paul Ganssle62972d92020-05-16 04:20:06 -04002613 }
2614
2615 strong_cache_free(ZONEINFO_STRONG_CACHE);
2616 ZONEINFO_STRONG_CACHE = NULL;
2617}
2618
2619static int
2620zoneinfomodule_exec(PyObject *m)
2621{
2622 PyDateTime_IMPORT;
2623 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2624 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2625 goto error;
2626 }
2627
2628 Py_INCREF(&PyZoneInfo_ZoneInfoType);
2629 PyModule_AddObject(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType);
2630
2631 /* Populate imports */
2632 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2633 if (_tzpath_module == NULL) {
2634 goto error;
2635 }
2636
2637 _tzpath_find_tzfile =
2638 PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2639 Py_DECREF(_tzpath_module);
2640 if (_tzpath_find_tzfile == NULL) {
2641 goto error;
2642 }
2643
2644 PyObject *io_module = PyImport_ImportModule("io");
2645 if (io_module == NULL) {
2646 goto error;
2647 }
2648
2649 io_open = PyObject_GetAttrString(io_module, "open");
2650 Py_DECREF(io_module);
2651 if (io_open == NULL) {
2652 goto error;
2653 }
2654
2655 _common_mod = PyImport_ImportModule("zoneinfo._common");
2656 if (_common_mod == NULL) {
2657 goto error;
2658 }
2659
2660 if (NO_TTINFO.utcoff == NULL) {
2661 NO_TTINFO.utcoff = Py_None;
2662 NO_TTINFO.dstoff = Py_None;
2663 NO_TTINFO.tzname = Py_None;
2664
2665 for (size_t i = 0; i < 3; ++i) {
2666 Py_INCREF(Py_None);
2667 }
2668 }
2669
2670 if (initialize_caches()) {
2671 goto error;
2672 }
2673
2674 return 0;
2675
2676error:
2677 return -1;
2678}
2679
2680static PyModuleDef_Slot zoneinfomodule_slots[] = {
2681 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2682
2683static struct PyModuleDef zoneinfomodule = {
2684 PyModuleDef_HEAD_INIT,
2685 .m_name = "_zoneinfo",
2686 .m_doc = "C implementation of the zoneinfo module",
2687 .m_size = 0,
2688 .m_methods = module_methods,
2689 .m_slots = zoneinfomodule_slots,
2690 .m_free = (freefunc)module_free};
2691
2692PyMODINIT_FUNC
2693PyInit__zoneinfo(void)
2694{
2695 return PyModuleDef_Init(&zoneinfomodule);
2696}