9d8f989f0211953425344f23e07565e2c8263637
[feed/packages.git] / lang / php7 / patches / 0013-Add-support-for-use-of-the-system-timezone-database.patch
1 From: Joe Orton <jorton@redhat.com>
2 Date: Sun, 18 Oct 2015 02:15:17 +0200
3 Subject: Add support for use of the system timezone database
4
5 Add support for use of the system timezone database, rather
6 than embedding a copy. Discussed upstream but was not desired.
7
8 History:
9 r13: adapt for upstream changes to use PHP allocator
10 r12: adapt for upstream changes for new zic
11 r11: use canonical names to avoid more case sensitivity issues
12 round lat/long from zone.tab towards zero per builtin db
13 r10: make timezone case insensitive
14 r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
15 r8: fix compile error without --with-system-tzdata configured
16 r7: improve check for valid timezone id to exclude directories
17 r6: fix fd leak in r5, fix country code/BC flag use in
18 timezone_identifiers_list() using system db,
19 fix use of PECL timezonedb to override system db,
20 r5: reverts addition of "System/Localtime" fake tzname.
21 updated for 5.3.0, parses zone.tab to pick up mapping between
22 timezone name, country code and long/lat coords
23 r4: added "System/Localtime" tzname which uses /etc/localtime
24 r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
25 r2: add filesystem trawl to set up name alias index
26 r1: initial revision
27 ---
28 ext/date/lib/parse_tz.c | 549 +++++++++++++++++++++++++++++++++++++++++++++++-
29 ext/date/lib/timelib.m4 | 14 ++
30 2 files changed, 552 insertions(+), 11 deletions(-)
31
32 diff --git a/ext/date/lib/parse_tz.c b/ext/date/lib/parse_tz.c
33 index 20d7eea..6301dc5 100644
34 --- a/ext/date/lib/parse_tz.c
35 +++ b/ext/date/lib/parse_tz.c
36 @@ -24,6 +24,16 @@
37
38 #include "timelib.h"
39
40 +#ifdef HAVE_SYSTEM_TZDATA
41 +#include <sys/mman.h>
42 +#include <sys/stat.h>
43 +#include <limits.h>
44 +#include <fcntl.h>
45 +#include <unistd.h>
46 +
47 +#include "php_scandir.h"
48 +#endif
49 +
50 #include <stdio.h>
51
52 #ifdef HAVE_LOCALE_H
53 @@ -36,8 +46,12 @@
54 #include <strings.h>
55 #endif
56
57 +#ifndef HAVE_SYSTEM_TZDATA
58 #define TIMELIB_SUPPORTS_V2DATA
59 #include "timezonedb.h"
60 +#endif
61 +
62 +#include <ctype.h>
63
64 #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
65 # if defined(__LITTLE_ENDIAN__)
66 @@ -59,6 +73,11 @@ static int read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
67 {
68 uint32_t version;
69
70 + if (memcmp(*tzf, "TZif", 4) == 0) {
71 + *tzf += 20;
72 + return 0;
73 + }
74 +
75 /* read ID */
76 version = (*tzf)[3] - '0';
77 *tzf += 4;
78 @@ -302,7 +321,418 @@ void timelib_dump_tzinfo(timelib_tzinfo *tz)
79 }
80 }
81
82 -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
83 +#ifdef HAVE_SYSTEM_TZDATA
84 +
85 +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
86 +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
87 +#else
88 +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
89 +#endif
90 +
91 +/* System timezone database pointer. */
92 +static const timelib_tzdb *timezonedb_system;
93 +
94 +/* Hash table entry for the cache of the zone.tab mapping table. */
95 +struct location_info {
96 + char code[2];
97 + double latitude, longitude;
98 + char name[64];
99 + char *comment;
100 + struct location_info *next;
101 +};
102 +
103 +/* Cache of zone.tab. */
104 +static struct location_info **system_location_table;
105 +
106 +/* Size of the zone.tab hash table; a random-ish prime big enough to
107 + * prevent too many collisions. */
108 +#define LOCINFO_HASH_SIZE (1021)
109 +
110 +/* Compute a case insensitive hash of str */
111 +static uint32_t tz_hash(const char *str)
112 +{
113 + const unsigned char *p = (const unsigned char *)str;
114 + uint32_t hash = 5381;
115 + int c;
116 +
117 + while ((c = tolower(*p++)) != '\0') {
118 + hash = (hash << 5) ^ hash ^ c;
119 + }
120 +
121 + return hash % LOCINFO_HASH_SIZE;
122 +}
123 +
124 +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
125 + * parsed string on success, or NULL on parse error. On success,
126 + * writes the parsed number to *result. */
127 +static char *parse_iso6709(char *p, double *result)
128 +{
129 + double v, sign;
130 + char *pend;
131 + size_t len;
132 +
133 + if (*p == '+')
134 + sign = 1.0;
135 + else if (*p == '-')
136 + sign = -1.0;
137 + else
138 + return NULL;
139 +
140 + p++;
141 + for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
142 + ;;
143 +
144 + /* Annoying encoding used by zone.tab has no decimal point, so use
145 + * the length to determine the format:
146 + *
147 + * 4 = DDMM
148 + * 5 = DDDMM
149 + * 6 = DDMMSS
150 + * 7 = DDDMMSS
151 + */
152 + len = pend - p;
153 + if (len < 4 || len > 7) {
154 + return NULL;
155 + }
156 +
157 + /* p => [D]DD */
158 + v = (p[0] - '0') * 10.0 + (p[1] - '0');
159 + p += 2;
160 + if (len == 5 || len == 7)
161 + v = v * 10.0 + (*p++ - '0');
162 + /* p => MM[SS] */
163 + v += (10.0 * (p[0] - '0')
164 + + p[1] - '0') / 60.0;
165 + p += 2;
166 + /* p => [SS] */
167 + if (len > 5) {
168 + v += (10.0 * (p[0] - '0')
169 + + p[1] - '0') / 3600.0;
170 + p += 2;
171 + }
172 +
173 + /* Round to five decimal place, not because it's a good idea,
174 + * but, because the builtin data uses rounded data, so, match
175 + * that. */
176 + *result = trunc(v * sign * 100000.0) / 100000.0;
177 +
178 + return p;
179 +}
180 +
181 +/* This function parses the zone.tab file to build up the mapping of
182 + * timezone to country code and geographic location, and returns a
183 + * hash table. The hash table is indexed by the function:
184 + *
185 + * tz_hash(timezone-name)
186 + */
187 +static struct location_info **create_location_table(void)
188 +{
189 + struct location_info **li, *i;
190 + char zone_tab[PATH_MAX];
191 + char line[512];
192 + FILE *fp;
193 +
194 + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
195 +
196 + fp = fopen(zone_tab, "r");
197 + if (!fp) {
198 + return NULL;
199 + }
200 +
201 + li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
202 +
203 + while (fgets(line, sizeof line, fp)) {
204 + char *p = line, *code, *name, *comment;
205 + uint32_t hash;
206 + double latitude, longitude;
207 +
208 + while (isspace(*p))
209 + p++;
210 +
211 + if (*p == '#' || *p == '\0' || *p == '\n')
212 + continue;
213 +
214 + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
215 + continue;
216 +
217 + /* code => AA */
218 + code = p;
219 + p[2] = 0;
220 + p += 3;
221 +
222 + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
223 + p = parse_iso6709(p, &latitude);
224 + if (!p) {
225 + continue;
226 + }
227 + p = parse_iso6709(p, &longitude);
228 + if (!p) {
229 + continue;
230 + }
231 +
232 + if (!p || *p != '\t') {
233 + continue;
234 + }
235 +
236 + /* name = string */
237 + name = ++p;
238 + while (*p != '\t' && *p && *p != '\n')
239 + p++;
240 +
241 + *p++ = '\0';
242 +
243 + /* comment = string */
244 + comment = p;
245 + while (*p != '\t' && *p && *p != '\n')
246 + p++;
247 +
248 + if (*p == '\n' || *p == '\t')
249 + *p = '\0';
250 +
251 + hash = tz_hash(name);
252 + i = malloc(sizeof *i);
253 + memcpy(i->code, code, 2);
254 + strncpy(i->name, name, sizeof i->name);
255 + i->comment = strdup(comment);
256 + i->longitude = longitude;
257 + i->latitude = latitude;
258 + i->next = li[hash];
259 + li[hash] = i;
260 + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
261 + }
262 +
263 + fclose(fp);
264 +
265 + return li;
266 +}
267 +
268 +/* Return location info from hash table, using given timezone name.
269 + * Returns NULL if the name could not be found. */
270 +const struct location_info *find_zone_info(struct location_info **li,
271 + const char *name)
272 +{
273 + uint32_t hash = tz_hash(name);
274 + const struct location_info *l;
275 +
276 + if (!li) {
277 + return NULL;
278 + }
279 +
280 + for (l = li[hash]; l; l = l->next) {
281 + if (strcasecmp(l->name, name) == 0)
282 + return l;
283 + }
284 +
285 + return NULL;
286 +}
287 +
288 +/* Filter out some non-tzdata files and the posix/right databases, if
289 + * present. */
290 +static int index_filter(const struct dirent *ent)
291 +{
292 + return strcmp(ent->d_name, ".") != 0
293 + && strcmp(ent->d_name, "..") != 0
294 + && strcmp(ent->d_name, "posix") != 0
295 + && strcmp(ent->d_name, "posixrules") != 0
296 + && strcmp(ent->d_name, "right") != 0
297 + && strstr(ent->d_name, ".tab") == NULL;
298 +}
299 +
300 +static int sysdbcmp(const void *first, const void *second)
301 +{
302 + const timelib_tzdb_index_entry *alpha = first, *beta = second;
303 +
304 + return strcasecmp(alpha->id, beta->id);
305 +}
306 +
307 +
308 +/* Create the zone identifier index by trawling the filesystem. */
309 +static void create_zone_index(timelib_tzdb *db)
310 +{
311 + size_t dirstack_size, dirstack_top;
312 + size_t index_size, index_next;
313 + timelib_tzdb_index_entry *db_index;
314 + char **dirstack;
315 +
316 + /* LIFO stack to hold directory entries to scan; each slot is a
317 + * directory name relative to the zoneinfo prefix. */
318 + dirstack_size = 32;
319 + dirstack = malloc(dirstack_size * sizeof *dirstack);
320 + dirstack_top = 1;
321 + dirstack[0] = strdup("");
322 +
323 + /* Index array. */
324 + index_size = 64;
325 + db_index = malloc(index_size * sizeof *db_index);
326 + index_next = 0;
327 +
328 + do {
329 + struct dirent **ents;
330 + char name[PATH_MAX], *top;
331 + int count;
332 +
333 + /* Pop the top stack entry, and iterate through its contents. */
334 + top = dirstack[--dirstack_top];
335 + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
336 +
337 + count = php_scandir(name, &ents, index_filter, php_alphasort);
338 +
339 + while (count > 0) {
340 + struct stat st;
341 + const char *leaf = ents[count - 1]->d_name;
342 +
343 + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
344 + top, leaf);
345 +
346 + if (strlen(name) && stat(name, &st) == 0) {
347 + /* Name, relative to the zoneinfo prefix. */
348 + const char *root = top;
349 +
350 + if (root[0] == '/') root++;
351 +
352 + snprintf(name, sizeof name, "%s%s%s", root,
353 + *root ? "/": "", leaf);
354 +
355 + if (S_ISDIR(st.st_mode)) {
356 + if (dirstack_top == dirstack_size) {
357 + dirstack_size *= 2;
358 + dirstack = realloc(dirstack,
359 + dirstack_size * sizeof *dirstack);
360 + }
361 + dirstack[dirstack_top++] = strdup(name);
362 + }
363 + else {
364 + if (index_next == index_size) {
365 + index_size *= 2;
366 + db_index = realloc(db_index,
367 + index_size * sizeof *db_index);
368 + }
369 +
370 + db_index[index_next++].id = strdup(name);
371 + }
372 + }
373 +
374 + free(ents[--count]);
375 + }
376 +
377 + if (count != -1) free(ents);
378 + free(top);
379 + } while (dirstack_top);
380 +
381 + qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
382 +
383 + db->index = db_index;
384 + db->index_size = index_next;
385 +
386 + free(dirstack);
387 +}
388 +
389 +#define FAKE_HEADER "1234\0??\1??"
390 +#define FAKE_UTC_POS (7 - 4)
391 +
392 +/* Create a fake data segment for database 'sysdb'. */
393 +static void fake_data_segment(timelib_tzdb *sysdb,
394 + struct location_info **info)
395 +{
396 + size_t n;
397 + char *data, *p;
398 +
399 + data = malloc(3 * sysdb->index_size + 7);
400 +
401 + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
402 +
403 + for (n = 0; n < sysdb->index_size; n++) {
404 + const struct location_info *li;
405 + timelib_tzdb_index_entry *ent;
406 +
407 + ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
408 +
409 + /* Lookup the timezone name in the hash table. */
410 + if (strcmp(ent->id, "UTC") == 0) {
411 + ent->pos = FAKE_UTC_POS;
412 + continue;
413 + }
414 +
415 + li = find_zone_info(info, ent->id);
416 + if (li) {
417 + /* If found, append the BC byte and the
418 + * country code; set the position for this
419 + * section of timezone data. */
420 + ent->pos = (p - data) - 4;
421 + *p++ = '\1';
422 + *p++ = li->code[0];
423 + *p++ = li->code[1];
424 + }
425 + else {
426 + /* If not found, the timezone data can
427 + * point at the header. */
428 + ent->pos = 0;
429 + }
430 + }
431 +
432 + sysdb->data = (unsigned char *)data;
433 +}
434 +
435 +/* Returns true if the passed-in stat structure describes a
436 + * probably-valid timezone file. */
437 +static int is_valid_tzfile(const struct stat *st)
438 +{
439 + return S_ISREG(st->st_mode) && st->st_size > 20;
440 +}
441 +
442 +/* To allow timezone names to be used case-insensitively, find the
443 + * canonical name for this timezone, if possible. */
444 +static const char *canonical_tzname(const char *timezone)
445 +{
446 + if (timezonedb_system) {
447 + timelib_tzdb_index_entry *ent, lookup;
448 +
449 + lookup.id = (char *)timezone;
450 +
451 + ent = bsearch(&lookup, timezonedb_system->index,
452 + timezonedb_system->index_size, sizeof lookup,
453 + sysdbcmp);
454 + if (ent) {
455 + return ent->id;
456 + }
457 + }
458 +
459 + return timezone;
460 +}
461 +
462 +/* Return the mmap()ed tzfile if found, else NULL. On success, the
463 + * length of the mapped data is placed in *length. */
464 +static char *map_tzfile(const char *timezone, size_t *length)
465 +{
466 + char fname[PATH_MAX];
467 + struct stat st;
468 + char *p;
469 + int fd;
470 +
471 + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
472 + return NULL;
473 + }
474 +
475 + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
476 +
477 + fd = open(fname, O_RDONLY);
478 + if (fd == -1) {
479 + return NULL;
480 + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
481 + close(fd);
482 + return NULL;
483 + }
484 +
485 + *length = st.st_size;
486 + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
487 + close(fd);
488 +
489 + return p != MAP_FAILED ? p : NULL;
490 +}
491 +
492 +#endif
493 +
494 +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
495 {
496 int left = 0, right = tzdb->index_size - 1;
497 #ifdef HAVE_SETLOCALE
498 @@ -341,21 +771,88 @@ static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const
499 return 0;
500 }
501
502 +static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
503 + char **map, size_t *maplen,
504 + const timelib_tzdb *tzdb)
505 +{
506 +#ifdef HAVE_SYSTEM_TZDATA
507 + if (tzdb == timezonedb_system) {
508 + char *orig;
509 +
510 + orig = map_tzfile(timezone, maplen);
511 + if (orig == NULL) {
512 + return 0;
513 + }
514 +
515 + (*tzf) = (unsigned char *)orig;
516 + *map = orig;
517 + return 1;
518 + }
519 + else
520 +#endif
521 + {
522 + return inmem_seek_to_tz_position(tzf, timezone, tzdb);
523 + }
524 +}
525 +
526 const timelib_tzdb *timelib_builtin_db(void)
527 {
528 +#ifdef HAVE_SYSTEM_TZDATA
529 + if (timezonedb_system == NULL) {
530 + timelib_tzdb *tmp = malloc(sizeof *tmp);
531 +
532 + tmp->version = "0.system";
533 + tmp->data = NULL;
534 + create_zone_index(tmp);
535 + system_location_table = create_location_table();
536 + fake_data_segment(tmp, system_location_table);
537 + timezonedb_system = tmp;
538 + }
539 +
540 + return timezonedb_system;
541 +#else
542 return &timezonedb_builtin;
543 +#endif
544 }
545
546 const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
547 {
548 +#ifdef HAVE_SYSTEM_TZDATA
549 + *count = timezonedb_system->index_size;
550 + return timezonedb_system->index;
551 +#else
552 *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
553 return timezonedb_idx_builtin;
554 +#endif
555 }
556
557 int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
558 {
559 const unsigned char *tzf;
560 - return (seek_to_tz_position(&tzf, timezone, tzdb));
561 +
562 +#ifdef HAVE_SYSTEM_TZDATA
563 + if (tzdb == timezonedb_system) {
564 + char fname[PATH_MAX];
565 + struct stat st;
566 +
567 + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
568 + return 0;
569 + }
570 +
571 + if (system_location_table) {
572 + if (find_zone_info(system_location_table, timezone) != NULL) {
573 + /* found in cache */
574 + return 1;
575 + }
576 + }
577 +
578 + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
579 +
580 + return stat(fname, &st) == 0 && is_valid_tzfile(&st);
581 + }
582 +#endif
583 +
584 + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
585 }
586
587 static void skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
588 @@ -380,24 +877,54 @@ static void read_64bit_header(const unsigned char **tzf, timelib_tzinfo *tz)
589 timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
590 {
591 const unsigned char *tzf;
592 + char *memmap = NULL;
593 + size_t maplen;
594 timelib_tzinfo *tmp;
595 int version;
596
597 - if (seek_to_tz_position(&tzf, timezone, tzdb)) {
598 + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
599 tmp = timelib_tzinfo_ctor(timezone);
600
601 version = read_preamble(&tzf, tmp);
602 read_header(&tzf, tmp);
603 read_transistions(&tzf, tmp);
604 read_types(&tzf, tmp);
605 - if (version == 2) {
606 - skip_64bit_preamble(&tzf, tmp);
607 - read_64bit_header(&tzf, tmp);
608 - skip_64bit_transistions(&tzf, tmp);
609 - skip_64bit_types(&tzf, tmp);
610 - skip_posix_string(&tzf, tmp);
611 - }
612 - read_location(&tzf, tmp);
613 +
614 +#ifdef HAVE_SYSTEM_TZDATA
615 + if (memmap) {
616 + const struct location_info *li;
617 +
618 + /* TZif-style - grok the location info from the system database,
619 + * if possible. */
620 +
621 + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
622 + tmp->location.comments = timelib_strdup(li->comment);
623 + strncpy(tmp->location.country_code, li->code, 2);
624 + tmp->location.longitude = li->longitude;
625 + tmp->location.latitude = li->latitude;
626 + tmp->bc = 1;
627 + }
628 + else {
629 + strcpy(tmp->location.country_code, "??");
630 + tmp->bc = 0;
631 + tmp->location.comments = timelib_strdup("");
632 + }
633 +
634 + /* Now done with the mmap segment - discard it. */
635 + munmap(memmap, maplen);
636 + } else
637 +#endif
638 + {
639 + /* PHP-style - use the embedded info. */
640 + if (version == 2) {
641 + skip_64bit_preamble(&tzf, tmp);
642 + read_64bit_header(&tzf, tmp);
643 + skip_64bit_transistions(&tzf, tmp);
644 + skip_64bit_types(&tzf, tmp);
645 + skip_posix_string(&tzf, tmp);
646 + }
647 + read_location(&tzf, tmp);
648 + }
649 } else {
650 tmp = NULL;
651 }
652 diff --git a/ext/date/lib/timelib.m4 b/ext/date/lib/timelib.m4
653 index c725572..4c837c7 100644
654 --- a/ext/date/lib/timelib.m4
655 +++ b/ext/date/lib/timelib.m4
656 @@ -78,3 +78,17 @@ stdlib.h
657
658 dnl Check for strtoll, atoll
659 AC_CHECK_FUNCS(strtoll atoll strftime)
660 +
661 +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
662 +[ --with-system-tzdata[=DIR] to specify use of system timezone data],
663 +no, no)
664 +
665 +if test "$PHP_SYSTEM_TZDATA" != "no"; then
666 + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
667 +
668 + if test "$PHP_SYSTEM_TZDATA" != "yes"; then
669 + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
670 + [Define for location of system timezone data])
671 + fi
672 +fi
673 +