1 | //===----------------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html |
10 | |
11 | // TODO TZDB look at optimizations |
12 | // |
13 | // The current algorithm is correct but not efficient. For example, in a named |
14 | // rule based continuation finding the next rule does quite a bit of work, |
15 | // returns the next rule and "forgets" its state. This could be better. |
16 | // |
17 | // It would be possible to cache lookups. If a time for a zone is calculated its |
18 | // sys_info could be kept and the next lookup could test whether the time is in |
19 | // a "known" sys_info. The wording in the Standard hints at this slowness by |
20 | // "suggesting" this could be implemented on the user's side. |
21 | |
22 | // TODO TZDB look at removing quirks |
23 | // |
24 | // The code has some special rules to adjust the timing at the continuation |
25 | // switches. This works correctly, but some of the places feel odd. It would be |
26 | // good to investigate this further and see whether all quirks are needed or |
27 | // that there are better fixes. |
28 | // |
29 | // These quirks often use a 12h interval; this is the scan interval of zdump, |
30 | // which implies there are no sys_info objects with a duration of less than 12h. |
31 | |
32 | #include <algorithm> |
33 | #include <cctype> |
34 | #include <chrono> |
35 | #include <expected> |
36 | #include <map> |
37 | #include <ranges> |
38 | |
39 | #include "include/tzdb/time_zone_private.h" |
40 | #include "include/tzdb/tzdb_list_private.h" |
41 | |
42 | // TODO TZDB remove debug printing |
43 | #ifdef PRINT |
44 | # include <print> |
45 | #endif |
46 | |
47 | _LIBCPP_BEGIN_NAMESPACE_STD |
48 | |
49 | #ifdef PRINT |
50 | template <> |
51 | struct formatter<chrono::sys_info, char> { |
52 | template <class ParseContext> |
53 | constexpr typename ParseContext::iterator parse(ParseContext& ctx) { |
54 | return ctx.begin(); |
55 | } |
56 | |
57 | template <class FormatContext> |
58 | typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const { |
59 | return std::format_to( |
60 | ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}" , info.begin, info.end, info.offset, info.save, info.abbrev); |
61 | } |
62 | }; |
63 | #endif |
64 | |
65 | namespace chrono { |
66 | |
67 | //===----------------------------------------------------------------------===// |
68 | // Details |
69 | //===----------------------------------------------------------------------===// |
70 | |
71 | struct __sys_info { |
72 | sys_info __info; |
73 | bool __can_merge; // Can the returned sys_info object be merged with |
74 | }; |
75 | |
76 | // Return type for helper function to get a sys_info. |
77 | // - The expected result returns the "best" sys_info object. This object can be |
78 | // before the requested time. Sometimes sys_info objects from different |
79 | // continuations share their offset, save, and abbrev and these objects are |
80 | // merged to one sys_info object. The __can_merge flag determines whether the |
81 | // current result can be merged with the next result. |
82 | // - The unexpected result means no sys_info object was found and the time is |
83 | // the time to be used for the next search iteration. |
84 | using __sys_info_result = expected<__sys_info, sys_seconds>; |
85 | |
86 | template <ranges::forward_range _Range, |
87 | class _Type, |
88 | class _Proj = identity, |
89 | indirect_strict_weak_order<const _Type*, projected<ranges::iterator_t<_Range>, _Proj>> _Comp = ranges::less> |
90 | [[nodiscard]] static ranges::borrowed_iterator_t<_Range> |
91 | __binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) { |
92 | auto __end = ranges::end(__r); |
93 | auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj); |
94 | if (__ret == __end) |
95 | return __end; |
96 | |
97 | // When the value does not match the predicate it's equal and a valid result |
98 | // was found. |
99 | return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end; |
100 | } |
101 | |
102 | // Format based on https://data.iana.org/time-zones/tz-how-to.html |
103 | // |
104 | // 1 a time zone abbreviation that is a string of three or more characters that |
105 | // are either ASCII alphanumerics, "+", or "-" |
106 | // 2 the string "%z", in which case the "%z" will be replaced by a numeric time |
107 | // zone abbreviation |
108 | // 3 a pair of time zone abbreviations separated by a slash ('/'), in which |
109 | // case the first string is the abbreviation for the standard time name and |
110 | // the second string is the abbreviation for the daylight saving time name |
111 | // 4 a string containing "%s", in which case the "%s" will be replaced by the |
112 | // text in the appropriate Rule's LETTER column, and the resulting string |
113 | // should be a time zone abbreviation |
114 | // |
115 | // Rule 1 is not strictly validated since America/Barbados uses a two letter |
116 | // abbreviation AT. |
117 | [[nodiscard]] static string |
118 | __format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) { |
119 | bool __shift = false; |
120 | string __result; |
121 | for (char __c : __continuation.__format) { |
122 | if (__shift) { |
123 | switch (__c) { |
124 | case 's': |
125 | std::ranges::copy(__letters, std::back_inserter(__result)); |
126 | break; |
127 | |
128 | case 'z': { |
129 | if (__continuation.__format.size() != 2) |
130 | std::__throw_runtime_error( |
131 | std::format("corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'" , |
132 | __continuation.__format) |
133 | .c_str()); |
134 | chrono::hh_mm_ss __offset{__continuation.__stdoff + __save}; |
135 | if (__offset.is_negative()) { |
136 | __result += '-'; |
137 | __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)}; |
138 | } else |
139 | __result += '+'; |
140 | |
141 | if (__offset.minutes() != 0min) |
142 | std::format_to(std::back_inserter(__result), "{:%H%M}" , __offset); |
143 | else |
144 | std::format_to(std::back_inserter(__result), "{:%H}" , __offset); |
145 | } break; |
146 | |
147 | default: |
148 | std::__throw_runtime_error( |
149 | std::format("corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z" , __c).c_str()); |
150 | } |
151 | __shift = false; |
152 | |
153 | } else if (__c == '/') { |
154 | if (__save != 0s) |
155 | __result.clear(); |
156 | else |
157 | break; |
158 | |
159 | } else if (__c == '%') { |
160 | __shift = true; |
161 | } else if (__c == '+' || __c == '-' || std::isalnum(__c)) { |
162 | __result.push_back(__c); |
163 | } else { |
164 | std::__throw_runtime_error( |
165 | std::format( |
166 | "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value" , __c) |
167 | .c_str()); |
168 | } |
169 | } |
170 | |
171 | if (__shift) |
172 | std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'" ); |
173 | |
174 | if (__result.empty()) |
175 | std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty" ); |
176 | |
177 | return __result; |
178 | } |
179 | |
180 | [[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) { |
181 | seconds __result = static_cast<sys_days>(__ymd).time_since_epoch() + __seconds; |
182 | return sys_seconds{__result}; |
183 | } |
184 | |
185 | [[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) { |
186 | switch (__continuation.__at.__clock) { |
187 | case __tz::__clock::__local: |
188 | return __continuation.__at.__time - __continuation.__stdoff - |
189 | std::visit( |
190 | [](const auto& __value) { |
191 | using _Tp = decay_t<decltype(__value)>; |
192 | if constexpr (same_as<_Tp, monostate>) |
193 | return chrono::seconds{0}; |
194 | else if constexpr (same_as<_Tp, __tz::__save>) |
195 | return chrono::duration_cast<seconds>(__value.__time); |
196 | else if constexpr (same_as<_Tp, std::string>) |
197 | // For a named rule based continuation the SAVE depends on the RULE |
198 | // active at the end. This should be determined separately. |
199 | return chrono::seconds{0}; |
200 | else |
201 | static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support |
202 | |
203 | std::__libcpp_unreachable(); |
204 | }, |
205 | __continuation.__rules); |
206 | |
207 | case __tz::__clock::__universal: |
208 | return __continuation.__at.__time; |
209 | |
210 | case __tz::__clock::__standard: |
211 | return __continuation.__at.__time - __continuation.__stdoff; |
212 | } |
213 | std::__libcpp_unreachable(); |
214 | } |
215 | |
216 | [[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) { |
217 | return std::visit( |
218 | [&](const auto& __value) { |
219 | using _Tp = decay_t<decltype(__value)>; |
220 | if constexpr (same_as<_Tp, chrono::day>) |
221 | return year_month_day{__year, __month, __value}; |
222 | else if constexpr (same_as<_Tp, weekday_last>) |
223 | return year_month_day{static_cast<sys_days>(year_month_weekday_last{__year, __month, __value})}; |
224 | else if constexpr (same_as<_Tp, __tz::__constrained_weekday>) |
225 | return __value(__year, __month); |
226 | else |
227 | static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support |
228 | |
229 | std::__libcpp_unreachable(); |
230 | }, |
231 | __on); |
232 | } |
233 | |
234 | [[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) { |
235 | // Does UNTIL contain the magic value for the last continuation? |
236 | if (__continuation.__year == chrono::year::min()) |
237 | return sys_seconds::max(); |
238 | |
239 | year_month_day __ymd = chrono::__to_year_month_day(__continuation.__year, __continuation.__in, __continuation.__on); |
240 | return chrono::__to_sys_seconds(__ymd, chrono::__at_to_sys_seconds(__continuation)); |
241 | } |
242 | |
243 | // Holds the UNTIL time for a continuation with a named rule. |
244 | // |
245 | // Unlike continuations with an fixed SAVE named rules have a variable SAVE. |
246 | // This means when the UNTIL uses the local wall time the actual UNTIL value can |
247 | // only be determined when the SAVE is known. This class holds that abstraction. |
248 | class __named_rule_until { |
249 | public: |
250 | explicit __named_rule_until(const __tz::__continuation& __continuation) |
251 | : __until_{chrono::__until_to_sys_seconds(__continuation)}, |
252 | __needs_adjustment_{ |
253 | // The last continuation of a ZONE has no UNTIL which basically is |
254 | // until the end of _local_ time. This value is expressed by |
255 | // sys_seconds::max(). Subtracting the SAVE leaves large value. |
256 | // However SAVE can be negative, which would add a value to maximum |
257 | // leading to undefined behaviour. In practice this often results in |
258 | // an overflow to a very small value. |
259 | __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {} |
260 | |
261 | // Gives the unadjusted until value, this is useful when the SAVE is not known |
262 | // at all. |
263 | sys_seconds __until() const noexcept { return __until_; } |
264 | |
265 | bool __needs_adjustment() const noexcept { return __needs_adjustment_; } |
266 | |
267 | // Returns the UNTIL adjusted for SAVE. |
268 | sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; } |
269 | |
270 | private: |
271 | sys_seconds __until_; |
272 | bool __needs_adjustment_; |
273 | }; |
274 | |
275 | [[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) { |
276 | switch (__rule.__at.__clock) { |
277 | case __tz::__clock::__local: |
278 | // Local time and standard time behave the same. This is not |
279 | // correct. Local time needs to adjust for the current saved time. |
280 | // To know the saved time the rules need to be known and sorted. |
281 | // This needs a time so to avoid the chicken and egg adjust the |
282 | // saving of the local time later. |
283 | return __rule.__at.__time - __stdoff; |
284 | |
285 | case __tz::__clock::__universal: |
286 | return __rule.__at.__time; |
287 | |
288 | case __tz::__clock::__standard: |
289 | return __rule.__at.__time - __stdoff; |
290 | } |
291 | std::__libcpp_unreachable(); |
292 | } |
293 | |
294 | [[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) { |
295 | year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on); |
296 | |
297 | seconds __at = chrono::__at_to_seconds(__stdoff, __rule); |
298 | return chrono::__to_sys_seconds(__ymd, __at); |
299 | } |
300 | |
301 | [[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) { |
302 | return chrono::__from_to_sys_seconds(__stdoff, __rule, __rule.__from); |
303 | } |
304 | |
305 | [[nodiscard]] static const vector<__tz::__rule>& |
306 | __get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) { |
307 | auto __result = chrono::__binary_find(__rules_db, __rule_name, {}, [](const auto& __p) { return __p.first; }); |
308 | if (__result == std::end(__rules_db)) |
309 | std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist" ).c_str()); |
310 | |
311 | return __result->second; |
312 | } |
313 | |
314 | // Returns the letters field for a time before the first rule. |
315 | // |
316 | // Per https://data.iana.org/time-zones/tz-how-to.html |
317 | // One wrinkle, not fully explained in zic.8.txt, is what happens when switching |
318 | // to a named rule. To what values should the SAVE and LETTER data be |
319 | // initialized? |
320 | // |
321 | // 1 If at least one transition has happened, use the SAVE and LETTER data from |
322 | // the most recent. |
323 | // 2 If switching to a named rule before any transition has happened, assume |
324 | // standard time (SAVE zero), and use the LETTER data from the earliest |
325 | // transition with a SAVE of zero. |
326 | // |
327 | // This function implements case 2. |
328 | [[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) { |
329 | auto __letters = |
330 | __rules // |
331 | | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) // |
332 | | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; }) // |
333 | | views::take(1); |
334 | |
335 | if (__letters.empty()) |
336 | std::__throw_runtime_error("corrupt tzdb: rule has zero entries" ); |
337 | |
338 | return __letters.front(); |
339 | } |
340 | |
341 | // Determines the information based on the continuation and the rules. |
342 | // |
343 | // There are several special cases to take into account |
344 | // |
345 | // === Entries before the first rule becomes active === |
346 | // Asia/Hong_Kong |
347 | // 9 - JST 1945 N 18 2 // (1) |
348 | // 8 HK HK%sT // (2) |
349 | // R HK 1946 o - Ap 21 0 1 S // (3) |
350 | // There (1) is active until Novemer 18th 1945 at 02:00, after this time |
351 | // (2) becomes active. The first rule entry for HK (3) becomes active |
352 | // from April 21st 1945 at 01:00. In the period between (2) is active. |
353 | // This entry has an offset. |
354 | // This entry has no save, letters, or dst flag. So in the period |
355 | // after (1) and until (3) no rule entry is associated with the time. |
356 | |
357 | [[nodiscard]] static sys_info __get_sys_info_before_first_rule( |
358 | sys_seconds __begin, |
359 | sys_seconds __end, |
360 | const __tz::__continuation& __continuation, |
361 | const vector<__tz::__rule>& __rules) { |
362 | return sys_info{ |
363 | __begin, |
364 | __end, |
365 | __continuation.__stdoff, |
366 | chrono::minutes(0), |
367 | chrono::__format(__continuation, __letters_before_first_rule(__rules), 0s)}; |
368 | } |
369 | |
370 | // Returns the sys_info object for a time before the first rule. |
371 | // When this first rule has a SAVE of 0s the sys_info for the time before the |
372 | // first rule and for the first rule are identical and will be merged. |
373 | [[nodiscard]] static sys_info __get_sys_info_before_first_rule( |
374 | sys_seconds __begin, |
375 | sys_seconds __rule_end, // The end used when SAVE != 0s |
376 | sys_seconds __next_end, // The end used when SAVE == 0s the times are merged |
377 | const __tz::__continuation& __continuation, |
378 | const vector<__tz::__rule>& __rules, |
379 | vector<__tz::__rule>::const_iterator __rule) { |
380 | if (__rule->__save.__time != 0s) |
381 | return __get_sys_info_before_first_rule(__begin, __rule_end, __continuation, __rules); |
382 | |
383 | return sys_info{ |
384 | __begin, __next_end, __continuation.__stdoff, 0min, chrono::__format(__continuation, __rule->__letters, 0s)}; |
385 | } |
386 | |
387 | [[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) { |
388 | switch (__rule.__at.__clock) { |
389 | case __tz::__clock::__local: |
390 | return __rule.__at.__time - __stdoff - __save; |
391 | |
392 | case __tz::__clock::__universal: |
393 | return __rule.__at.__time; |
394 | |
395 | case __tz::__clock::__standard: |
396 | return __rule.__at.__time - __stdoff; |
397 | } |
398 | std::__libcpp_unreachable(); |
399 | } |
400 | |
401 | [[nodiscard]] static sys_seconds |
402 | __rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) { |
403 | year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on); |
404 | |
405 | seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule); |
406 | return chrono::__to_sys_seconds(__ymd, __at); |
407 | } |
408 | |
409 | // Returns the first rule after __time. |
410 | // Note that a rule can be "active" in multiple years, this may result in an |
411 | // infinite loop where the same rule is returned every time, use __current to |
412 | // guard against that. |
413 | // |
414 | // When no next rule exists the returned time will be sys_seconds::max(). This |
415 | // can happen in practice. For example, |
416 | // |
417 | // R So 1945 o - May 24 2 2 M |
418 | // R So 1945 o - S 24 3 1 S |
419 | // R So 1945 o - N 18 2s 0 - |
420 | // |
421 | // Has 3 rules that are all only active in 1945. |
422 | [[nodiscard]] static pair<sys_seconds, vector<__tz::__rule>::const_iterator> |
423 | __next_rule(sys_seconds __time, |
424 | seconds __stdoff, |
425 | seconds __save, |
426 | const vector<__tz::__rule>& __rules, |
427 | vector<__tz::__rule>::const_iterator __current) { |
428 | year __year = year_month_day{chrono::floor<days>(__time)}.year(); |
429 | |
430 | // Note it would probably be better to store the pairs in a vector and then |
431 | // use min() to get the smallest element |
432 | map<sys_seconds, vector<__tz::__rule>::const_iterator> __candidates; |
433 | // Note this evaluates all rules which is a waste of effort; when the entries |
434 | // are beyond the current year's "next year" (where "next year" is not always |
435 | // year + 1) the algorithm should end. |
436 | for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) { |
437 | for (year __y = __it->__from; __y <= __it->__to; ++__y) { |
438 | // Adding the current entry for the current year may lead to infinite |
439 | // loops due to the SAVE adjustment. Skip these entries. |
440 | if (__y == __year && __it == __current) |
441 | continue; |
442 | |
443 | sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, *__it, __y); |
444 | if (__t <= __time) |
445 | continue; |
446 | |
447 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule" ); |
448 | __candidates[__t] = __it; |
449 | break; |
450 | } |
451 | } |
452 | |
453 | if (!__candidates.empty()) [[likely]] { |
454 | auto __it = __candidates.begin(); |
455 | |
456 | // When no rule is selected the time before the first rule and the first rule |
457 | // should not be merged. |
458 | if (__time == sys_seconds::min()) |
459 | return *__it; |
460 | |
461 | // There can be two constitutive rules that are the same. For example, |
462 | // Hong Kong |
463 | // |
464 | // R HK 1973 o - D 30 3:30 1 S (R1) |
465 | // R HK 1965 1976 - Ap Su>=16 3:30 1 S (R2) |
466 | // |
467 | // 1973-12-29 19:30:00 R1 becomes active. |
468 | // 1974-04-20 18:30:00 R2 becomes active. |
469 | // Both rules have a SAVE of 1 hour and LETTERS are S for both of them. |
470 | while (__it != __candidates.end()) { |
471 | if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters) |
472 | return *__it; |
473 | |
474 | ++__it; |
475 | } |
476 | } |
477 | |
478 | return {sys_seconds::max(), __rules.end()}; |
479 | } |
480 | |
481 | // Returns the first rule of a set of rules. |
482 | // This is not always the first of the listed rules. For example |
483 | // R Sa 2008 2009 - Mar Su>=8 0 0 - |
484 | // R Sa 2007 2008 - O Su>=8 0 1 - |
485 | // The transition in October 2007 happens before the transition in March 2008. |
486 | [[nodiscard]] static vector<__tz::__rule>::const_iterator |
487 | __first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) { |
488 | return chrono::__next_rule(sys_seconds::min(), __stdoff, 0s, __rules, __rules.end()).second; |
489 | } |
490 | |
491 | [[nodiscard]] static __sys_info_result __get_sys_info_rule( |
492 | sys_seconds __time, |
493 | sys_seconds __continuation_begin, |
494 | const __tz::__continuation& __continuation, |
495 | const vector<__tz::__rule>& __rules) { |
496 | auto __rule = chrono::__first_rule(__continuation.__stdoff, __rules); |
497 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule" ); |
498 | |
499 | // Avoid selecting a time before the start of the continuation |
500 | __time = std::max(__time, __continuation_begin); |
501 | |
502 | sys_seconds __rule_begin = chrono::__from_to_sys_seconds(__continuation.__stdoff, *__rule); |
503 | |
504 | // The time sought is very likely inside the current rule. |
505 | // When the continuation's UNTIL uses the local clock there are edge cases |
506 | // where this is not true. |
507 | // |
508 | // Start to walk the rules to find the proper one. |
509 | // |
510 | // For now we just walk all the rules TODO TZDB investigate whether a smarter |
511 | // algorithm would work. |
512 | auto __next = chrono::__next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule); |
513 | |
514 | // Ignore small steps, this happens with America/Punta_Arenas for the |
515 | // transition |
516 | // -4:42:46 - SMT 1927 S |
517 | // -5 x -05/-04 1932 S |
518 | // ... |
519 | // |
520 | // R x 1927 1931 - S 1 0 1 - |
521 | // R x 1928 1932 - Ap 1 0 0 - |
522 | // |
523 | // America/Punta_Arenas Thu Sep 1 04:42:45 1927 UT = Thu Sep 1 00:42:45 1927 -04 isdst=1 gmtoff=-14400 |
524 | // America/Punta_Arenas Sun Apr 1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400 |
525 | // America/Punta_Arenas Sun Apr 1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000 |
526 | // |
527 | // Without this there will be a transition |
528 | // [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05 |
529 | |
530 | if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) { |
531 | if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h) |
532 | return __sys_info{__get_sys_info_before_first_rule( |
533 | __continuation_begin, __rule_begin, __next.first, __continuation, __rules, __rule), |
534 | false}; |
535 | |
536 | // Europe/Berlin |
537 | // 1 c CE%sT 1945 May 24 2 (C1) |
538 | // 1 So CE%sT 1946 (C2) |
539 | // |
540 | // R c 1944 1945 - Ap M>=1 2s 1 S (R1) |
541 | // |
542 | // R So 1945 o - May 24 2 2 M (R2) |
543 | // |
544 | // When C2 becomes active the time would be before the first rule R2, |
545 | // giving a 1 hour sys_info. |
546 | seconds __save = __rule->__save.__time; |
547 | __named_rule_until __continuation_end{__continuation}; |
548 | sys_seconds __sys_info_end = std::min(__continuation_end(__save), __next.first); |
549 | |
550 | return __sys_info{ |
551 | sys_info{__continuation_begin, |
552 | __sys_info_end, |
553 | __continuation.__stdoff + __save, |
554 | chrono::duration_cast<minutes>(__save), |
555 | chrono::__format(__continuation, __rule->__letters, __save)}, |
556 | __sys_info_end == __continuation_end(__save)}; |
557 | } |
558 | |
559 | // See above for America/Asuncion |
560 | if (__rule->__save.__time == 0s && __time < __next.first) { |
561 | return __sys_info{ |
562 | sys_info{__continuation_begin, |
563 | __next.first, |
564 | __continuation.__stdoff, |
565 | 0min, |
566 | chrono::__format(__continuation, __rule->__letters, 0s)}, |
567 | false}; |
568 | } |
569 | |
570 | if (__rule->__save.__time != 0s) { |
571 | // another fix for America/Punta_Arenas when not at the start of the |
572 | // sys_info object. |
573 | seconds __save = __rule->__save.__time; |
574 | if (__continuation_begin >= __rule_begin - __save && __time < __next.first) { |
575 | return __sys_info{ |
576 | sys_info{__continuation_begin, |
577 | __next.first, |
578 | __continuation.__stdoff + __save, |
579 | chrono::duration_cast<minutes>(__save), |
580 | chrono::__format(__continuation, __rule->__letters, __save)}, |
581 | false}; |
582 | } |
583 | } |
584 | |
585 | __named_rule_until __continuation_end{__continuation}; |
586 | while (__next.second != __rules.end()) { |
587 | #ifdef PRINT |
588 | std::print( |
589 | stderr, |
590 | "Rule for {}: [{}, {}) off={} save={} duration={}\n" , |
591 | __time, |
592 | __rule_begin, |
593 | __next.first, |
594 | __continuation.__stdoff, |
595 | __rule->__save.__time, |
596 | __next.first - __rule_begin); |
597 | #endif |
598 | |
599 | sys_seconds __end = __continuation_end(__rule->__save.__time); |
600 | |
601 | sys_seconds __sys_info_begin = std::max(__continuation_begin, __rule_begin); |
602 | sys_seconds __sys_info_end = std::min(__end, __next.first); |
603 | seconds __diff = chrono::abs(__sys_info_end - __sys_info_begin); |
604 | |
605 | if (__diff < 12h) { |
606 | // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31 |
607 | // -4:16:48 - CMT 1920 May |
608 | // -4 - -04 1930 D |
609 | // -4 A -04/-03 1969 O 5 |
610 | // -3 A -03/-02 1999 O 3 |
611 | // -4 A -04/-03 2000 Mar 3 |
612 | // ... |
613 | // |
614 | // ... |
615 | // R A 1989 1992 - O Su>=15 0 1 - |
616 | // R A 1999 o - O Su>=1 0 1 - |
617 | // R A 2000 o - Mar 3 0 0 - |
618 | // R A 2007 o - D 30 0 1 - |
619 | // ... |
620 | |
621 | // The 1999 switch uses the same rule, but with a different stdoff. |
622 | // R A 1999 o - O Su>=1 0 1 - |
623 | // stdoff -3 -> 1999-10-03 03:00:00 |
624 | // stdoff -4 -> 1999-10-03 04:00:00 |
625 | // This generates an invalid entry and this is evaluated as a transition. |
626 | // Looking at the zdump like output in libc++ this generates jumps in |
627 | // the UTC time. |
628 | |
629 | __rule = __next.second; |
630 | __next = __next_rule(__next.first, __continuation.__stdoff, __rule->__save.__time, __rules, __rule); |
631 | __end = __continuation_end(__rule->__save.__time); |
632 | __sys_info_end = std::min(__end, __next.first); |
633 | } |
634 | |
635 | if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) { |
636 | __sys_info_begin = std::max(__continuation_begin, __rule_begin); |
637 | __sys_info_end = std::min(__end, __next.first); |
638 | |
639 | return __sys_info{ |
640 | sys_info{__sys_info_begin, |
641 | __sys_info_end, |
642 | __continuation.__stdoff + __rule->__save.__time, |
643 | chrono::duration_cast<minutes>(__rule->__save.__time), |
644 | chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)}, |
645 | __sys_info_end == __end}; |
646 | } |
647 | |
648 | __rule_begin = __next.first; |
649 | __rule = __next.second; |
650 | __next = __next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule); |
651 | } |
652 | |
653 | return __sys_info{ |
654 | sys_info{std::max(__continuation_begin, __rule_begin), |
655 | __continuation_end(__rule->__save.__time), |
656 | __continuation.__stdoff + __rule->__save.__time, |
657 | chrono::duration_cast<minutes>(__rule->__save.__time), |
658 | chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)}, |
659 | true}; |
660 | } |
661 | |
662 | [[nodiscard]] static __sys_info_result __get_sys_info_basic( |
663 | sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) { |
664 | sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation); |
665 | return __sys_info{ |
666 | sys_info{__continuation_begin, |
667 | __continuation_end, |
668 | __continuation.__stdoff + __save, |
669 | chrono::duration_cast<minutes>(__save), |
670 | __continuation.__format}, |
671 | true}; |
672 | } |
673 | |
674 | [[nodiscard]] static __sys_info_result |
675 | __get_sys_info(sys_seconds __time, |
676 | sys_seconds __continuation_begin, |
677 | const __tz::__continuation& __continuation, |
678 | const __tz::__rules_storage_type& __rules_db) { |
679 | return std::visit( |
680 | [&](const auto& __value) { |
681 | using _Tp = decay_t<decltype(__value)>; |
682 | if constexpr (same_as<_Tp, std::string>) |
683 | return chrono::__get_sys_info_rule( |
684 | __time, __continuation_begin, __continuation, __get_rules(__rules_db, __value)); |
685 | else if constexpr (same_as<_Tp, monostate>) |
686 | return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, chrono::seconds(0)); |
687 | else if constexpr (same_as<_Tp, __tz::__save>) |
688 | return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, __value.__time); |
689 | else |
690 | static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support |
691 | |
692 | std::__libcpp_unreachable(); |
693 | }, |
694 | __continuation.__rules); |
695 | } |
696 | |
697 | // The transition from one continuation to the next continuation may result in |
698 | // two constitutive continuations with the same "offset" information. |
699 | // [time.zone.info.sys]/3 |
700 | // The begin and end data members indicate that, for the associated time_zone |
701 | // and time_point, the offset and abbrev are in effect in the range |
702 | // [begin, end). This information can be used to efficiently iterate the |
703 | // transitions of a time_zone. |
704 | // |
705 | // Note that this does considers a change in the SAVE field not to be a |
706 | // different sys_info, zdump does consider this different. |
707 | // LWG XXXX The sys_info range should be affected by save |
708 | // matches the behaviour of the Standard and zdump. |
709 | // |
710 | // Iff the "offsets" are the same '__current.__end' is replaced with |
711 | // '__next.__end', which effectively merges the two objects in one object. The |
712 | // function returns true if a merge occurred. |
713 | [[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) { |
714 | if (__current.end != __next.begin) |
715 | return false; |
716 | |
717 | if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save) |
718 | return false; |
719 | |
720 | __current.end = __next.end; |
721 | return true; |
722 | } |
723 | |
724 | //===----------------------------------------------------------------------===// |
725 | // Public API |
726 | //===----------------------------------------------------------------------===// |
727 | |
728 | [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr<time_zone::__impl>&& __p) { |
729 | _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object" ); |
730 | time_zone result; |
731 | result.__impl_ = std::move(__p); |
732 | return result; |
733 | } |
734 | |
735 | _LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default; |
736 | |
737 | [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); } |
738 | |
739 | [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info |
740 | time_zone::__get_info(sys_seconds __time) const { |
741 | optional<sys_info> __result; |
742 | bool __valid_result = false; // true iff __result.has_value() is true and |
743 | // __result.begin <= __time < __result.end is true. |
744 | bool __can_merge = false; |
745 | sys_seconds __continuation_begin = sys_seconds::min(); |
746 | // Iterates over the Zone entry and its continuations. Internally the Zone |
747 | // entry is split in a Zone information and the first continuation. The last |
748 | // continuation has no UNTIL field. This means the loop should always find a |
749 | // continuation. |
750 | // |
751 | // For more information on background of zone information please consult the |
752 | // following information |
753 | // [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html) |
754 | // [tz source info](https://data.iana.org/time-zones/tz-how-to.html) |
755 | // On POSIX systems the zdump tool can be useful: |
756 | // zdump -v Asia/Hong_Kong |
757 | // Gives all transitions in the Hong Kong time zone. |
758 | // |
759 | // During iteration the result for the current continuation is returned. If |
760 | // no continuation is applicable it will return the end time as "error". When |
761 | // two continuations are contiguous and contain the "same" information these |
762 | // ranges are merged as one range. |
763 | // The merging requires keeping any result that occurs before __time, |
764 | // likewise when a valid result is found the algorithm needs to test the next |
765 | // continuation to see whether it can be merged. For example, Africa/Ceuta |
766 | // Continuations |
767 | // 0 s WE%sT 1929 (C1) |
768 | // 0 - WET 1967 (C2) |
769 | // 0 Sp WE%sT 1984 Mar 16 (C3) |
770 | // |
771 | // Rules |
772 | // R s 1926 1929 - O Sa>=1 24s 0 - (R1) |
773 | // |
774 | // R Sp 1967 o - Jun 3 12 1 S (R2) |
775 | // |
776 | // The rule R1 is the last rule used in C1. The rule R2 is the first rule in |
777 | // C3. Since R2 is the first rule this means when a continuation uses this |
778 | // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a |
779 | // SAVE of 0, in this case WET. |
780 | // This gives the following changes in the information. |
781 | // 1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET |
782 | // 1929-01-01 00:00:00 C2 becomes active: offset 0 save 0 abbrev WET |
783 | // 1967-01-01 00:00:00 C3 becomes active: offset 0 save 0 abbrev WET |
784 | // 1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST |
785 | // |
786 | // The first 3 entries are contiguous and contain the same information, this |
787 | // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be |
788 | // returned in one sys_info object. |
789 | |
790 | const auto& __continuations = __impl_->__continuations(); |
791 | const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db(); |
792 | for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) { |
793 | const auto& __continuation = *__it; |
794 | __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db); |
795 | |
796 | if (__sys_info) { |
797 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
798 | __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range" ); |
799 | |
800 | // Filters out dummy entries |
801 | // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31 |
802 | // ... |
803 | // -4 A -04/-03 2000 Mar 3 (C1) |
804 | // -3 A -03/-02 (C2) |
805 | // |
806 | // ... |
807 | // R A 2000 o - Mar 3 0 0 - |
808 | // R A 2007 o - D 30 0 1 - |
809 | // ... |
810 | // |
811 | // This results in an entry |
812 | // [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03 |
813 | // for [C1 & R1, C1, R2) which due to the end of the continuation is an |
814 | // one hour "sys_info". Instead the entry should be ignored and replaced |
815 | // by [C2 & R1, C2 & R2) which is the proper range |
816 | // "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02 |
817 | |
818 | if (std::holds_alternative<string>(__continuation.__rules) && __sys_info->__can_merge && |
819 | __sys_info->__info.begin + 12h > __sys_info->__info.end) { |
820 | __continuation_begin = __sys_info->__info.begin; |
821 | continue; |
822 | } |
823 | |
824 | if (!__result) { |
825 | // First entry found, always keep it. |
826 | __result = __sys_info->__info; |
827 | |
828 | __valid_result = __time >= __result->begin && __time < __result->end; |
829 | __can_merge = __sys_info->__can_merge; |
830 | } else if (__can_merge && chrono::__merge_continuation(*__result, __sys_info->__info)) { |
831 | // The results are merged, update the result state. This may |
832 | // "overwrite" a valid sys_info object with another valid sys_info |
833 | // object. |
834 | __valid_result = __time >= __result->begin && __time < __result->end; |
835 | __can_merge = __sys_info->__can_merge; |
836 | } else { |
837 | // Here things get interesting: |
838 | // For example, America/Argentina/San_Luis |
839 | // |
840 | // -3 A -03/-02 2008 Ja 21 (C1) |
841 | // -4 Sa -04/-03 2009 O 11 (C2) |
842 | // |
843 | // R A 2007 o - D 30 0 1 - (R1) |
844 | // |
845 | // R Sa 2007 2008 - O Su>=8 0 1 - (R2) |
846 | // |
847 | // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00 |
848 | // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00 |
849 | // In this case the earlier time is the real time of the transition. |
850 | // However the algorithm used gives 2008-01-21 03:00:00. |
851 | // |
852 | // So we need to calculate the previous UNTIL in the current context and |
853 | // see whether it's earlier. |
854 | |
855 | // The results could not be merged. |
856 | // - When we have a valid result that result is the final result. |
857 | // - Otherwise the result we had is before __time and the result we got |
858 | // is at a later time (possibly valid). This result is always better |
859 | // than the previous result. |
860 | if (__valid_result) { |
861 | return *__result; |
862 | } else { |
863 | _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( |
864 | __it != __continuations.begin(), "the first rule should always seed the result" ); |
865 | const auto& __last = *(__it - 1); |
866 | if (std::holds_alternative<string>(__last.__rules)) { |
867 | // Europe/Berlin |
868 | // 1 c CE%sT 1945 May 24 2 (C1) |
869 | // 1 So CE%sT 1946 (C2) |
870 | // |
871 | // R c 1944 1945 - Ap M>=1 2s 1 S (R1) |
872 | // |
873 | // R So 1945 o - May 24 2 2 M (R2) |
874 | // |
875 | // When C2 becomes active the time would be before the first rule R2, |
876 | // giving a 1 hour sys_info. This is not valid and the results need |
877 | // merging. |
878 | |
879 | if (__result->end != __sys_info->__info.begin) { |
880 | // When the UTC gap between the rules is due to the change of |
881 | // offsets adjust the new time to remove the gap. |
882 | sys_seconds __end = __result->end - __result->offset; |
883 | sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset; |
884 | if (__end == __begin) { |
885 | __sys_info->__info.begin = __result->end; |
886 | } |
887 | } |
888 | } |
889 | |
890 | __result = __sys_info->__info; |
891 | __valid_result = __time >= __result->begin && __time < __result->end; |
892 | __can_merge = __sys_info->__can_merge; |
893 | } |
894 | } |
895 | __continuation_begin = __result->end; |
896 | } else { |
897 | __continuation_begin = __sys_info.error(); |
898 | } |
899 | } |
900 | if (__valid_result) |
901 | return *__result; |
902 | |
903 | std::__throw_runtime_error("tzdb: corrupt db" ); |
904 | } |
905 | |
906 | } // namespace chrono |
907 | |
908 | _LIBCPP_END_NAMESPACE_STD |
909 | |