package/libs/zlib: Add ARM and NEON optimizations
[openwrt/openwrt.git] / package / libs / zlib / patches / 002-arm-specific-optimisations-for-inflate.patch
1 From 6bac7a3e0ebcd3147294b73acb34606eba18ae7f Mon Sep 17 00:00:00 2001
2 From: Simon Hosie <simon.hosie@arm.com>
3 Date: Wed, 12 Apr 2017 12:52:33 -0700
4 Subject: [PATCH 1/2] Prepare ARM-specific contrib directory.
5
6 Change-Id: Id4cda552b39bfb39ab35ec499dbe122b43b6d1a1
7 ---
8 contrib/arm/inffast.c | 323 ++++++++++
9 contrib/arm/inflate.c | 1561 +++++++++++++++++++++++++++++++++++++++++++++++++
10 2 files changed, 1884 insertions(+)
11 create mode 100644 contrib/arm/inffast.c
12 create mode 100644 contrib/arm/inflate.c
13
14 diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast.c
15 new file mode 100644
16 index 00000000..0dbd1dbc
17 --- /dev/null
18 +++ b/contrib/arm/inffast.c
19 @@ -0,0 +1,323 @@
20 +/* inffast.c -- fast decoding
21 + * Copyright (C) 1995-2017 Mark Adler
22 + * For conditions of distribution and use, see copyright notice in zlib.h
23 + */
24 +
25 +#include "zutil.h"
26 +#include "inftrees.h"
27 +#include "inflate.h"
28 +#include "inffast.h"
29 +
30 +#ifdef ASMINF
31 +# pragma message("Assembler code may have bugs -- use at your own risk")
32 +#else
33 +
34 +/*
35 + Decode literal, length, and distance codes and write out the resulting
36 + literal and match bytes until either not enough input or output is
37 + available, an end-of-block is encountered, or a data error is encountered.
38 + When large enough input and output buffers are supplied to inflate(), for
39 + example, a 16K input buffer and a 64K output buffer, more than 95% of the
40 + inflate execution time is spent in this routine.
41 +
42 + Entry assumptions:
43 +
44 + state->mode == LEN
45 + strm->avail_in >= 6
46 + strm->avail_out >= 258
47 + start >= strm->avail_out
48 + state->bits < 8
49 +
50 + On return, state->mode is one of:
51 +
52 + LEN -- ran out of enough output space or enough available input
53 + TYPE -- reached end of block code, inflate() to interpret next block
54 + BAD -- error in block data
55 +
56 + Notes:
57 +
58 + - The maximum input bits used by a length/distance pair is 15 bits for the
59 + length code, 5 bits for the length extra, 15 bits for the distance code,
60 + and 13 bits for the distance extra. This totals 48 bits, or six bytes.
61 + Therefore if strm->avail_in >= 6, then there is enough input to avoid
62 + checking for available input while decoding.
63 +
64 + - The maximum bytes that a single length/distance pair can output is 258
65 + bytes, which is the maximum length that can be coded. inflate_fast()
66 + requires strm->avail_out >= 258 for each loop to avoid checking for
67 + output space.
68 + */
69 +void ZLIB_INTERNAL inflate_fast(strm, start)
70 +z_streamp strm;
71 +unsigned start; /* inflate()'s starting value for strm->avail_out */
72 +{
73 + struct inflate_state FAR *state;
74 + z_const unsigned char FAR *in; /* local strm->next_in */
75 + z_const unsigned char FAR *last; /* have enough input while in < last */
76 + unsigned char FAR *out; /* local strm->next_out */
77 + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
78 + unsigned char FAR *end; /* while out < end, enough space available */
79 +#ifdef INFLATE_STRICT
80 + unsigned dmax; /* maximum distance from zlib header */
81 +#endif
82 + unsigned wsize; /* window size or zero if not using window */
83 + unsigned whave; /* valid bytes in the window */
84 + unsigned wnext; /* window write index */
85 + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
86 + unsigned long hold; /* local strm->hold */
87 + unsigned bits; /* local strm->bits */
88 + code const FAR *lcode; /* local strm->lencode */
89 + code const FAR *dcode; /* local strm->distcode */
90 + unsigned lmask; /* mask for first level of length codes */
91 + unsigned dmask; /* mask for first level of distance codes */
92 + code here; /* retrieved table entry */
93 + unsigned op; /* code bits, operation, extra bits, or */
94 + /* window position, window bytes to copy */
95 + unsigned len; /* match length, unused bytes */
96 + unsigned dist; /* match distance */
97 + unsigned char FAR *from; /* where to copy match from */
98 +
99 + /* copy state to local variables */
100 + state = (struct inflate_state FAR *)strm->state;
101 + in = strm->next_in;
102 + last = in + (strm->avail_in - 5);
103 + out = strm->next_out;
104 + beg = out - (start - strm->avail_out);
105 + end = out + (strm->avail_out - 257);
106 +#ifdef INFLATE_STRICT
107 + dmax = state->dmax;
108 +#endif
109 + wsize = state->wsize;
110 + whave = state->whave;
111 + wnext = state->wnext;
112 + window = state->window;
113 + hold = state->hold;
114 + bits = state->bits;
115 + lcode = state->lencode;
116 + dcode = state->distcode;
117 + lmask = (1U << state->lenbits) - 1;
118 + dmask = (1U << state->distbits) - 1;
119 +
120 + /* decode literals and length/distances until end-of-block or not enough
121 + input data or output space */
122 + do {
123 + if (bits < 15) {
124 + hold += (unsigned long)(*in++) << bits;
125 + bits += 8;
126 + hold += (unsigned long)(*in++) << bits;
127 + bits += 8;
128 + }
129 + here = lcode[hold & lmask];
130 + dolen:
131 + op = (unsigned)(here.bits);
132 + hold >>= op;
133 + bits -= op;
134 + op = (unsigned)(here.op);
135 + if (op == 0) { /* literal */
136 + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
137 + "inflate: literal '%c'\n" :
138 + "inflate: literal 0x%02x\n", here.val));
139 + *out++ = (unsigned char)(here.val);
140 + }
141 + else if (op & 16) { /* length base */
142 + len = (unsigned)(here.val);
143 + op &= 15; /* number of extra bits */
144 + if (op) {
145 + if (bits < op) {
146 + hold += (unsigned long)(*in++) << bits;
147 + bits += 8;
148 + }
149 + len += (unsigned)hold & ((1U << op) - 1);
150 + hold >>= op;
151 + bits -= op;
152 + }
153 + Tracevv((stderr, "inflate: length %u\n", len));
154 + if (bits < 15) {
155 + hold += (unsigned long)(*in++) << bits;
156 + bits += 8;
157 + hold += (unsigned long)(*in++) << bits;
158 + bits += 8;
159 + }
160 + here = dcode[hold & dmask];
161 + dodist:
162 + op = (unsigned)(here.bits);
163 + hold >>= op;
164 + bits -= op;
165 + op = (unsigned)(here.op);
166 + if (op & 16) { /* distance base */
167 + dist = (unsigned)(here.val);
168 + op &= 15; /* number of extra bits */
169 + if (bits < op) {
170 + hold += (unsigned long)(*in++) << bits;
171 + bits += 8;
172 + if (bits < op) {
173 + hold += (unsigned long)(*in++) << bits;
174 + bits += 8;
175 + }
176 + }
177 + dist += (unsigned)hold & ((1U << op) - 1);
178 +#ifdef INFLATE_STRICT
179 + if (dist > dmax) {
180 + strm->msg = (char *)"invalid distance too far back";
181 + state->mode = BAD;
182 + break;
183 + }
184 +#endif
185 + hold >>= op;
186 + bits -= op;
187 + Tracevv((stderr, "inflate: distance %u\n", dist));
188 + op = (unsigned)(out - beg); /* max distance in output */
189 + if (dist > op) { /* see if copy from window */
190 + op = dist - op; /* distance back in window */
191 + if (op > whave) {
192 + if (state->sane) {
193 + strm->msg =
194 + (char *)"invalid distance too far back";
195 + state->mode = BAD;
196 + break;
197 + }
198 +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
199 + if (len <= op - whave) {
200 + do {
201 + *out++ = 0;
202 + } while (--len);
203 + continue;
204 + }
205 + len -= op - whave;
206 + do {
207 + *out++ = 0;
208 + } while (--op > whave);
209 + if (op == 0) {
210 + from = out - dist;
211 + do {
212 + *out++ = *from++;
213 + } while (--len);
214 + continue;
215 + }
216 +#endif
217 + }
218 + from = window;
219 + if (wnext == 0) { /* very common case */
220 + from += wsize - op;
221 + if (op < len) { /* some from window */
222 + len -= op;
223 + do {
224 + *out++ = *from++;
225 + } while (--op);
226 + from = out - dist; /* rest from output */
227 + }
228 + }
229 + else if (wnext < op) { /* wrap around window */
230 + from += wsize + wnext - op;
231 + op -= wnext;
232 + if (op < len) { /* some from end of window */
233 + len -= op;
234 + do {
235 + *out++ = *from++;
236 + } while (--op);
237 + from = window;
238 + if (wnext < len) { /* some from start of window */
239 + op = wnext;
240 + len -= op;
241 + do {
242 + *out++ = *from++;
243 + } while (--op);
244 + from = out - dist; /* rest from output */
245 + }
246 + }
247 + }
248 + else { /* contiguous in window */
249 + from += wnext - op;
250 + if (op < len) { /* some from window */
251 + len -= op;
252 + do {
253 + *out++ = *from++;
254 + } while (--op);
255 + from = out - dist; /* rest from output */
256 + }
257 + }
258 + while (len > 2) {
259 + *out++ = *from++;
260 + *out++ = *from++;
261 + *out++ = *from++;
262 + len -= 3;
263 + }
264 + if (len) {
265 + *out++ = *from++;
266 + if (len > 1)
267 + *out++ = *from++;
268 + }
269 + }
270 + else {
271 + from = out - dist; /* copy direct from output */
272 + do { /* minimum length is three */
273 + *out++ = *from++;
274 + *out++ = *from++;
275 + *out++ = *from++;
276 + len -= 3;
277 + } while (len > 2);
278 + if (len) {
279 + *out++ = *from++;
280 + if (len > 1)
281 + *out++ = *from++;
282 + }
283 + }
284 + }
285 + else if ((op & 64) == 0) { /* 2nd level distance code */
286 + here = dcode[here.val + (hold & ((1U << op) - 1))];
287 + goto dodist;
288 + }
289 + else {
290 + strm->msg = (char *)"invalid distance code";
291 + state->mode = BAD;
292 + break;
293 + }
294 + }
295 + else if ((op & 64) == 0) { /* 2nd level length code */
296 + here = lcode[here.val + (hold & ((1U << op) - 1))];
297 + goto dolen;
298 + }
299 + else if (op & 32) { /* end-of-block */
300 + Tracevv((stderr, "inflate: end of block\n"));
301 + state->mode = TYPE;
302 + break;
303 + }
304 + else {
305 + strm->msg = (char *)"invalid literal/length code";
306 + state->mode = BAD;
307 + break;
308 + }
309 + } while (in < last && out < end);
310 +
311 + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
312 + len = bits >> 3;
313 + in -= len;
314 + bits -= len << 3;
315 + hold &= (1U << bits) - 1;
316 +
317 + /* update state and return */
318 + strm->next_in = in;
319 + strm->next_out = out;
320 + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
321 + strm->avail_out = (unsigned)(out < end ?
322 + 257 + (end - out) : 257 - (out - end));
323 + state->hold = hold;
324 + state->bits = bits;
325 + return;
326 +}
327 +
328 +/*
329 + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
330 + - Using bit fields for code structure
331 + - Different op definition to avoid & for extra bits (do & for table bits)
332 + - Three separate decoding do-loops for direct, window, and wnext == 0
333 + - Special case for distance > 1 copies to do overlapped load and store copy
334 + - Explicit branch predictions (based on measured branch probabilities)
335 + - Deferring match copy and interspersed it with decoding subsequent codes
336 + - Swapping literal/length else
337 + - Swapping window/direct else
338 + - Larger unrolled copy loops (three is about right)
339 + - Moving len -= 3 statement into middle of loop
340 + */
341 +
342 +#endif /* !ASMINF */
343 diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
344 new file mode 100644
345 index 00000000..ac333e8c
346 --- /dev/null
347 +++ b/contrib/arm/inflate.c
348 @@ -0,0 +1,1561 @@
349 +/* inflate.c -- zlib decompression
350 + * Copyright (C) 1995-2016 Mark Adler
351 + * For conditions of distribution and use, see copyright notice in zlib.h
352 + */
353 +
354 +/*
355 + * Change history:
356 + *
357 + * 1.2.beta0 24 Nov 2002
358 + * - First version -- complete rewrite of inflate to simplify code, avoid
359 + * creation of window when not needed, minimize use of window when it is
360 + * needed, make inffast.c even faster, implement gzip decoding, and to
361 + * improve code readability and style over the previous zlib inflate code
362 + *
363 + * 1.2.beta1 25 Nov 2002
364 + * - Use pointers for available input and output checking in inffast.c
365 + * - Remove input and output counters in inffast.c
366 + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
367 + * - Remove unnecessary second byte pull from length extra in inffast.c
368 + * - Unroll direct copy to three copies per loop in inffast.c
369 + *
370 + * 1.2.beta2 4 Dec 2002
371 + * - Change external routine names to reduce potential conflicts
372 + * - Correct filename to inffixed.h for fixed tables in inflate.c
373 + * - Make hbuf[] unsigned char to match parameter type in inflate.c
374 + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
375 + * to avoid negation problem on Alphas (64 bit) in inflate.c
376 + *
377 + * 1.2.beta3 22 Dec 2002
378 + * - Add comments on state->bits assertion in inffast.c
379 + * - Add comments on op field in inftrees.h
380 + * - Fix bug in reuse of allocated window after inflateReset()
381 + * - Remove bit fields--back to byte structure for speed
382 + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
383 + * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
384 + * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
385 + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
386 + * - Use local copies of stream next and avail values, as well as local bit
387 + * buffer and bit count in inflate()--for speed when inflate_fast() not used
388 + *
389 + * 1.2.beta4 1 Jan 2003
390 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
391 + * - Move a comment on output buffer sizes from inffast.c to inflate.c
392 + * - Add comments in inffast.c to introduce the inflate_fast() routine
393 + * - Rearrange window copies in inflate_fast() for speed and simplification
394 + * - Unroll last copy for window match in inflate_fast()
395 + * - Use local copies of window variables in inflate_fast() for speed
396 + * - Pull out common wnext == 0 case for speed in inflate_fast()
397 + * - Make op and len in inflate_fast() unsigned for consistency
398 + * - Add FAR to lcode and dcode declarations in inflate_fast()
399 + * - Simplified bad distance check in inflate_fast()
400 + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
401 + * source file infback.c to provide a call-back interface to inflate for
402 + * programs like gzip and unzip -- uses window as output buffer to avoid
403 + * window copying
404 + *
405 + * 1.2.beta5 1 Jan 2003
406 + * - Improved inflateBack() interface to allow the caller to provide initial
407 + * input in strm.
408 + * - Fixed stored blocks bug in inflateBack()
409 + *
410 + * 1.2.beta6 4 Jan 2003
411 + * - Added comments in inffast.c on effectiveness of POSTINC
412 + * - Typecasting all around to reduce compiler warnings
413 + * - Changed loops from while (1) or do {} while (1) to for (;;), again to
414 + * make compilers happy
415 + * - Changed type of window in inflateBackInit() to unsigned char *
416 + *
417 + * 1.2.beta7 27 Jan 2003
418 + * - Changed many types to unsigned or unsigned short to avoid warnings
419 + * - Added inflateCopy() function
420 + *
421 + * 1.2.0 9 Mar 2003
422 + * - Changed inflateBack() interface to provide separate opaque descriptors
423 + * for the in() and out() functions
424 + * - Changed inflateBack() argument and in_func typedef to swap the length
425 + * and buffer address return values for the input function
426 + * - Check next_in and next_out for Z_NULL on entry to inflate()
427 + *
428 + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
429 + */
430 +
431 +#include "zutil.h"
432 +#include "inftrees.h"
433 +#include "inflate.h"
434 +#include "inffast.h"
435 +
436 +#ifdef MAKEFIXED
437 +# ifndef BUILDFIXED
438 +# define BUILDFIXED
439 +# endif
440 +#endif
441 +
442 +/* function prototypes */
443 +local int inflateStateCheck OF((z_streamp strm));
444 +local void fixedtables OF((struct inflate_state FAR *state));
445 +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
446 + unsigned copy));
447 +#ifdef BUILDFIXED
448 + void makefixed OF((void));
449 +#endif
450 +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
451 + unsigned len));
452 +
453 +local int inflateStateCheck(strm)
454 +z_streamp strm;
455 +{
456 + struct inflate_state FAR *state;
457 + if (strm == Z_NULL ||
458 + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
459 + return 1;
460 + state = (struct inflate_state FAR *)strm->state;
461 + if (state == Z_NULL || state->strm != strm ||
462 + state->mode < HEAD || state->mode > SYNC)
463 + return 1;
464 + return 0;
465 +}
466 +
467 +int ZEXPORT inflateResetKeep(strm)
468 +z_streamp strm;
469 +{
470 + struct inflate_state FAR *state;
471 +
472 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
473 + state = (struct inflate_state FAR *)strm->state;
474 + strm->total_in = strm->total_out = state->total = 0;
475 + strm->msg = Z_NULL;
476 + if (state->wrap) /* to support ill-conceived Java test suite */
477 + strm->adler = state->wrap & 1;
478 + state->mode = HEAD;
479 + state->last = 0;
480 + state->havedict = 0;
481 + state->dmax = 32768U;
482 + state->head = Z_NULL;
483 + state->hold = 0;
484 + state->bits = 0;
485 + state->lencode = state->distcode = state->next = state->codes;
486 + state->sane = 1;
487 + state->back = -1;
488 + Tracev((stderr, "inflate: reset\n"));
489 + return Z_OK;
490 +}
491 +
492 +int ZEXPORT inflateReset(strm)
493 +z_streamp strm;
494 +{
495 + struct inflate_state FAR *state;
496 +
497 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
498 + state = (struct inflate_state FAR *)strm->state;
499 + state->wsize = 0;
500 + state->whave = 0;
501 + state->wnext = 0;
502 + return inflateResetKeep(strm);
503 +}
504 +
505 +int ZEXPORT inflateReset2(strm, windowBits)
506 +z_streamp strm;
507 +int windowBits;
508 +{
509 + int wrap;
510 + struct inflate_state FAR *state;
511 +
512 + /* get the state */
513 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
514 + state = (struct inflate_state FAR *)strm->state;
515 +
516 + /* extract wrap request from windowBits parameter */
517 + if (windowBits < 0) {
518 + wrap = 0;
519 + windowBits = -windowBits;
520 + }
521 + else {
522 + wrap = (windowBits >> 4) + 5;
523 +#ifdef GUNZIP
524 + if (windowBits < 48)
525 + windowBits &= 15;
526 +#endif
527 + }
528 +
529 + /* set number of window bits, free window if different */
530 + if (windowBits && (windowBits < 8 || windowBits > 15))
531 + return Z_STREAM_ERROR;
532 + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
533 + ZFREE(strm, state->window);
534 + state->window = Z_NULL;
535 + }
536 +
537 + /* update state and reset the rest of it */
538 + state->wrap = wrap;
539 + state->wbits = (unsigned)windowBits;
540 + return inflateReset(strm);
541 +}
542 +
543 +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
544 +z_streamp strm;
545 +int windowBits;
546 +const char *version;
547 +int stream_size;
548 +{
549 + int ret;
550 + struct inflate_state FAR *state;
551 +
552 + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
553 + stream_size != (int)(sizeof(z_stream)))
554 + return Z_VERSION_ERROR;
555 + if (strm == Z_NULL) return Z_STREAM_ERROR;
556 + strm->msg = Z_NULL; /* in case we return an error */
557 + if (strm->zalloc == (alloc_func)0) {
558 +#ifdef Z_SOLO
559 + return Z_STREAM_ERROR;
560 +#else
561 + strm->zalloc = zcalloc;
562 + strm->opaque = (voidpf)0;
563 +#endif
564 + }
565 + if (strm->zfree == (free_func)0)
566 +#ifdef Z_SOLO
567 + return Z_STREAM_ERROR;
568 +#else
569 + strm->zfree = zcfree;
570 +#endif
571 + state = (struct inflate_state FAR *)
572 + ZALLOC(strm, 1, sizeof(struct inflate_state));
573 + if (state == Z_NULL) return Z_MEM_ERROR;
574 + Tracev((stderr, "inflate: allocated\n"));
575 + strm->state = (struct internal_state FAR *)state;
576 + state->strm = strm;
577 + state->window = Z_NULL;
578 + state->mode = HEAD; /* to pass state test in inflateReset2() */
579 + ret = inflateReset2(strm, windowBits);
580 + if (ret != Z_OK) {
581 + ZFREE(strm, state);
582 + strm->state = Z_NULL;
583 + }
584 + return ret;
585 +}
586 +
587 +int ZEXPORT inflateInit_(strm, version, stream_size)
588 +z_streamp strm;
589 +const char *version;
590 +int stream_size;
591 +{
592 + return inflateInit2_(strm, DEF_WBITS, version, stream_size);
593 +}
594 +
595 +int ZEXPORT inflatePrime(strm, bits, value)
596 +z_streamp strm;
597 +int bits;
598 +int value;
599 +{
600 + struct inflate_state FAR *state;
601 +
602 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
603 + state = (struct inflate_state FAR *)strm->state;
604 + if (bits < 0) {
605 + state->hold = 0;
606 + state->bits = 0;
607 + return Z_OK;
608 + }
609 + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
610 + value &= (1L << bits) - 1;
611 + state->hold += (unsigned)value << state->bits;
612 + state->bits += (uInt)bits;
613 + return Z_OK;
614 +}
615 +
616 +/*
617 + Return state with length and distance decoding tables and index sizes set to
618 + fixed code decoding. Normally this returns fixed tables from inffixed.h.
619 + If BUILDFIXED is defined, then instead this routine builds the tables the
620 + first time it's called, and returns those tables the first time and
621 + thereafter. This reduces the size of the code by about 2K bytes, in
622 + exchange for a little execution time. However, BUILDFIXED should not be
623 + used for threaded applications, since the rewriting of the tables and virgin
624 + may not be thread-safe.
625 + */
626 +local void fixedtables(state)
627 +struct inflate_state FAR *state;
628 +{
629 +#ifdef BUILDFIXED
630 + static int virgin = 1;
631 + static code *lenfix, *distfix;
632 + static code fixed[544];
633 +
634 + /* build fixed huffman tables if first call (may not be thread safe) */
635 + if (virgin) {
636 + unsigned sym, bits;
637 + static code *next;
638 +
639 + /* literal/length table */
640 + sym = 0;
641 + while (sym < 144) state->lens[sym++] = 8;
642 + while (sym < 256) state->lens[sym++] = 9;
643 + while (sym < 280) state->lens[sym++] = 7;
644 + while (sym < 288) state->lens[sym++] = 8;
645 + next = fixed;
646 + lenfix = next;
647 + bits = 9;
648 + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
649 +
650 + /* distance table */
651 + sym = 0;
652 + while (sym < 32) state->lens[sym++] = 5;
653 + distfix = next;
654 + bits = 5;
655 + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
656 +
657 + /* do this just once */
658 + virgin = 0;
659 + }
660 +#else /* !BUILDFIXED */
661 +# include "inffixed.h"
662 +#endif /* BUILDFIXED */
663 + state->lencode = lenfix;
664 + state->lenbits = 9;
665 + state->distcode = distfix;
666 + state->distbits = 5;
667 +}
668 +
669 +#ifdef MAKEFIXED
670 +#include <stdio.h>
671 +
672 +/*
673 + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also
674 + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes
675 + those tables to stdout, which would be piped to inffixed.h. A small program
676 + can simply call makefixed to do this:
677 +
678 + void makefixed(void);
679 +
680 + int main(void)
681 + {
682 + makefixed();
683 + return 0;
684 + }
685 +
686 + Then that can be linked with zlib built with MAKEFIXED defined and run:
687 +
688 + a.out > inffixed.h
689 + */
690 +void makefixed()
691 +{
692 + unsigned low, size;
693 + struct inflate_state state;
694 +
695 + fixedtables(&state);
696 + puts(" /* inffixed.h -- table for decoding fixed codes");
697 + puts(" * Generated automatically by makefixed().");
698 + puts(" */");
699 + puts("");
700 + puts(" /* WARNING: this file should *not* be used by applications.");
701 + puts(" It is part of the implementation of this library and is");
702 + puts(" subject to change. Applications should only use zlib.h.");
703 + puts(" */");
704 + puts("");
705 + size = 1U << 9;
706 + printf(" static const code lenfix[%u] = {", size);
707 + low = 0;
708 + for (;;) {
709 + if ((low % 7) == 0) printf("\n ");
710 + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
711 + state.lencode[low].bits, state.lencode[low].val);
712 + if (++low == size) break;
713 + putchar(',');
714 + }
715 + puts("\n };");
716 + size = 1U << 5;
717 + printf("\n static const code distfix[%u] = {", size);
718 + low = 0;
719 + for (;;) {
720 + if ((low % 6) == 0) printf("\n ");
721 + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
722 + state.distcode[low].val);
723 + if (++low == size) break;
724 + putchar(',');
725 + }
726 + puts("\n };");
727 +}
728 +#endif /* MAKEFIXED */
729 +
730 +/*
731 + Update the window with the last wsize (normally 32K) bytes written before
732 + returning. If window does not exist yet, create it. This is only called
733 + when a window is already in use, or when output has been written during this
734 + inflate call, but the end of the deflate stream has not been reached yet.
735 + It is also called to create a window for dictionary data when a dictionary
736 + is loaded.
737 +
738 + Providing output buffers larger than 32K to inflate() should provide a speed
739 + advantage, since only the last 32K of output is copied to the sliding window
740 + upon return from inflate(), and since all distances after the first 32K of
741 + output will fall in the output data, making match copies simpler and faster.
742 + The advantage may be dependent on the size of the processor's data caches.
743 + */
744 +local int updatewindow(strm, end, copy)
745 +z_streamp strm;
746 +const Bytef *end;
747 +unsigned copy;
748 +{
749 + struct inflate_state FAR *state;
750 + unsigned dist;
751 +
752 + state = (struct inflate_state FAR *)strm->state;
753 +
754 + /* if it hasn't been done already, allocate space for the window */
755 + if (state->window == Z_NULL) {
756 + state->window = (unsigned char FAR *)
757 + ZALLOC(strm, 1U << state->wbits,
758 + sizeof(unsigned char));
759 + if (state->window == Z_NULL) return 1;
760 + }
761 +
762 + /* if window not in use yet, initialize */
763 + if (state->wsize == 0) {
764 + state->wsize = 1U << state->wbits;
765 + state->wnext = 0;
766 + state->whave = 0;
767 + }
768 +
769 + /* copy state->wsize or less output bytes into the circular window */
770 + if (copy >= state->wsize) {
771 + zmemcpy(state->window, end - state->wsize, state->wsize);
772 + state->wnext = 0;
773 + state->whave = state->wsize;
774 + }
775 + else {
776 + dist = state->wsize - state->wnext;
777 + if (dist > copy) dist = copy;
778 + zmemcpy(state->window + state->wnext, end - copy, dist);
779 + copy -= dist;
780 + if (copy) {
781 + zmemcpy(state->window, end - copy, copy);
782 + state->wnext = copy;
783 + state->whave = state->wsize;
784 + }
785 + else {
786 + state->wnext += dist;
787 + if (state->wnext == state->wsize) state->wnext = 0;
788 + if (state->whave < state->wsize) state->whave += dist;
789 + }
790 + }
791 + return 0;
792 +}
793 +
794 +/* Macros for inflate(): */
795 +
796 +/* check function to use adler32() for zlib or crc32() for gzip */
797 +#ifdef GUNZIP
798 +# define UPDATE(check, buf, len) \
799 + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
800 +#else
801 +# define UPDATE(check, buf, len) adler32(check, buf, len)
802 +#endif
803 +
804 +/* check macros for header crc */
805 +#ifdef GUNZIP
806 +# define CRC2(check, word) \
807 + do { \
808 + hbuf[0] = (unsigned char)(word); \
809 + hbuf[1] = (unsigned char)((word) >> 8); \
810 + check = crc32(check, hbuf, 2); \
811 + } while (0)
812 +
813 +# define CRC4(check, word) \
814 + do { \
815 + hbuf[0] = (unsigned char)(word); \
816 + hbuf[1] = (unsigned char)((word) >> 8); \
817 + hbuf[2] = (unsigned char)((word) >> 16); \
818 + hbuf[3] = (unsigned char)((word) >> 24); \
819 + check = crc32(check, hbuf, 4); \
820 + } while (0)
821 +#endif
822 +
823 +/* Load registers with state in inflate() for speed */
824 +#define LOAD() \
825 + do { \
826 + put = strm->next_out; \
827 + left = strm->avail_out; \
828 + next = strm->next_in; \
829 + have = strm->avail_in; \
830 + hold = state->hold; \
831 + bits = state->bits; \
832 + } while (0)
833 +
834 +/* Restore state from registers in inflate() */
835 +#define RESTORE() \
836 + do { \
837 + strm->next_out = put; \
838 + strm->avail_out = left; \
839 + strm->next_in = next; \
840 + strm->avail_in = have; \
841 + state->hold = hold; \
842 + state->bits = bits; \
843 + } while (0)
844 +
845 +/* Clear the input bit accumulator */
846 +#define INITBITS() \
847 + do { \
848 + hold = 0; \
849 + bits = 0; \
850 + } while (0)
851 +
852 +/* Get a byte of input into the bit accumulator, or return from inflate()
853 + if there is no input available. */
854 +#define PULLBYTE() \
855 + do { \
856 + if (have == 0) goto inf_leave; \
857 + have--; \
858 + hold += (unsigned long)(*next++) << bits; \
859 + bits += 8; \
860 + } while (0)
861 +
862 +/* Assure that there are at least n bits in the bit accumulator. If there is
863 + not enough available input to do that, then return from inflate(). */
864 +#define NEEDBITS(n) \
865 + do { \
866 + while (bits < (unsigned)(n)) \
867 + PULLBYTE(); \
868 + } while (0)
869 +
870 +/* Return the low n bits of the bit accumulator (n < 16) */
871 +#define BITS(n) \
872 + ((unsigned)hold & ((1U << (n)) - 1))
873 +
874 +/* Remove n bits from the bit accumulator */
875 +#define DROPBITS(n) \
876 + do { \
877 + hold >>= (n); \
878 + bits -= (unsigned)(n); \
879 + } while (0)
880 +
881 +/* Remove zero to seven bits as needed to go to a byte boundary */
882 +#define BYTEBITS() \
883 + do { \
884 + hold >>= bits & 7; \
885 + bits -= bits & 7; \
886 + } while (0)
887 +
888 +/*
889 + inflate() uses a state machine to process as much input data and generate as
890 + much output data as possible before returning. The state machine is
891 + structured roughly as follows:
892 +
893 + for (;;) switch (state) {
894 + ...
895 + case STATEn:
896 + if (not enough input data or output space to make progress)
897 + return;
898 + ... make progress ...
899 + state = STATEm;
900 + break;
901 + ...
902 + }
903 +
904 + so when inflate() is called again, the same case is attempted again, and
905 + if the appropriate resources are provided, the machine proceeds to the
906 + next state. The NEEDBITS() macro is usually the way the state evaluates
907 + whether it can proceed or should return. NEEDBITS() does the return if
908 + the requested bits are not available. The typical use of the BITS macros
909 + is:
910 +
911 + NEEDBITS(n);
912 + ... do something with BITS(n) ...
913 + DROPBITS(n);
914 +
915 + where NEEDBITS(n) either returns from inflate() if there isn't enough
916 + input left to load n bits into the accumulator, or it continues. BITS(n)
917 + gives the low n bits in the accumulator. When done, DROPBITS(n) drops
918 + the low n bits off the accumulator. INITBITS() clears the accumulator
919 + and sets the number of available bits to zero. BYTEBITS() discards just
920 + enough bits to put the accumulator on a byte boundary. After BYTEBITS()
921 + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
922 +
923 + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
924 + if there is no input available. The decoding of variable length codes uses
925 + PULLBYTE() directly in order to pull just enough bytes to decode the next
926 + code, and no more.
927 +
928 + Some states loop until they get enough input, making sure that enough
929 + state information is maintained to continue the loop where it left off
930 + if NEEDBITS() returns in the loop. For example, want, need, and keep
931 + would all have to actually be part of the saved state in case NEEDBITS()
932 + returns:
933 +
934 + case STATEw:
935 + while (want < need) {
936 + NEEDBITS(n);
937 + keep[want++] = BITS(n);
938 + DROPBITS(n);
939 + }
940 + state = STATEx;
941 + case STATEx:
942 +
943 + As shown above, if the next state is also the next case, then the break
944 + is omitted.
945 +
946 + A state may also return if there is not enough output space available to
947 + complete that state. Those states are copying stored data, writing a
948 + literal byte, and copying a matching string.
949 +
950 + When returning, a "goto inf_leave" is used to update the total counters,
951 + update the check value, and determine whether any progress has been made
952 + during that inflate() call in order to return the proper return code.
953 + Progress is defined as a change in either strm->avail_in or strm->avail_out.
954 + When there is a window, goto inf_leave will update the window with the last
955 + output written. If a goto inf_leave occurs in the middle of decompression
956 + and there is no window currently, goto inf_leave will create one and copy
957 + output to the window for the next call of inflate().
958 +
959 + In this implementation, the flush parameter of inflate() only affects the
960 + return code (per zlib.h). inflate() always writes as much as possible to
961 + strm->next_out, given the space available and the provided input--the effect
962 + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
963 + the allocation of and copying into a sliding window until necessary, which
964 + provides the effect documented in zlib.h for Z_FINISH when the entire input
965 + stream available. So the only thing the flush parameter actually does is:
966 + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
967 + will return Z_BUF_ERROR if it has not reached the end of the stream.
968 + */
969 +
970 +int ZEXPORT inflate(strm, flush)
971 +z_streamp strm;
972 +int flush;
973 +{
974 + struct inflate_state FAR *state;
975 + z_const unsigned char FAR *next; /* next input */
976 + unsigned char FAR *put; /* next output */
977 + unsigned have, left; /* available input and output */
978 + unsigned long hold; /* bit buffer */
979 + unsigned bits; /* bits in bit buffer */
980 + unsigned in, out; /* save starting available input and output */
981 + unsigned copy; /* number of stored or match bytes to copy */
982 + unsigned char FAR *from; /* where to copy match bytes from */
983 + code here; /* current decoding table entry */
984 + code last; /* parent table entry */
985 + unsigned len; /* length to copy for repeats, bits to drop */
986 + int ret; /* return code */
987 +#ifdef GUNZIP
988 + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */
989 +#endif
990 + static const unsigned short order[19] = /* permutation of code lengths */
991 + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
992 +
993 + if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
994 + (strm->next_in == Z_NULL && strm->avail_in != 0))
995 + return Z_STREAM_ERROR;
996 +
997 + state = (struct inflate_state FAR *)strm->state;
998 + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
999 + LOAD();
1000 + in = have;
1001 + out = left;
1002 + ret = Z_OK;
1003 + for (;;)
1004 + switch (state->mode) {
1005 + case HEAD:
1006 + if (state->wrap == 0) {
1007 + state->mode = TYPEDO;
1008 + break;
1009 + }
1010 + NEEDBITS(16);
1011 +#ifdef GUNZIP
1012 + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */
1013 + if (state->wbits == 0)
1014 + state->wbits = 15;
1015 + state->check = crc32(0L, Z_NULL, 0);
1016 + CRC2(state->check, hold);
1017 + INITBITS();
1018 + state->mode = FLAGS;
1019 + break;
1020 + }
1021 + state->flags = 0; /* expect zlib header */
1022 + if (state->head != Z_NULL)
1023 + state->head->done = -1;
1024 + if (!(state->wrap & 1) || /* check if zlib header allowed */
1025 +#else
1026 + if (
1027 +#endif
1028 + ((BITS(8) << 8) + (hold >> 8)) % 31) {
1029 + strm->msg = (char *)"incorrect header check";
1030 + state->mode = BAD;
1031 + break;
1032 + }
1033 + if (BITS(4) != Z_DEFLATED) {
1034 + strm->msg = (char *)"unknown compression method";
1035 + state->mode = BAD;
1036 + break;
1037 + }
1038 + DROPBITS(4);
1039 + len = BITS(4) + 8;
1040 + if (state->wbits == 0)
1041 + state->wbits = len;
1042 + if (len > 15 || len > state->wbits) {
1043 + strm->msg = (char *)"invalid window size";
1044 + state->mode = BAD;
1045 + break;
1046 + }
1047 + state->dmax = 1U << len;
1048 + Tracev((stderr, "inflate: zlib header ok\n"));
1049 + strm->adler = state->check = adler32(0L, Z_NULL, 0);
1050 + state->mode = hold & 0x200 ? DICTID : TYPE;
1051 + INITBITS();
1052 + break;
1053 +#ifdef GUNZIP
1054 + case FLAGS:
1055 + NEEDBITS(16);
1056 + state->flags = (int)(hold);
1057 + if ((state->flags & 0xff) != Z_DEFLATED) {
1058 + strm->msg = (char *)"unknown compression method";
1059 + state->mode = BAD;
1060 + break;
1061 + }
1062 + if (state->flags & 0xe000) {
1063 + strm->msg = (char *)"unknown header flags set";
1064 + state->mode = BAD;
1065 + break;
1066 + }
1067 + if (state->head != Z_NULL)
1068 + state->head->text = (int)((hold >> 8) & 1);
1069 + if ((state->flags & 0x0200) && (state->wrap & 4))
1070 + CRC2(state->check, hold);
1071 + INITBITS();
1072 + state->mode = TIME;
1073 + case TIME:
1074 + NEEDBITS(32);
1075 + if (state->head != Z_NULL)
1076 + state->head->time = hold;
1077 + if ((state->flags & 0x0200) && (state->wrap & 4))
1078 + CRC4(state->check, hold);
1079 + INITBITS();
1080 + state->mode = OS;
1081 + case OS:
1082 + NEEDBITS(16);
1083 + if (state->head != Z_NULL) {
1084 + state->head->xflags = (int)(hold & 0xff);
1085 + state->head->os = (int)(hold >> 8);
1086 + }
1087 + if ((state->flags & 0x0200) && (state->wrap & 4))
1088 + CRC2(state->check, hold);
1089 + INITBITS();
1090 + state->mode = EXLEN;
1091 + case EXLEN:
1092 + if (state->flags & 0x0400) {
1093 + NEEDBITS(16);
1094 + state->length = (unsigned)(hold);
1095 + if (state->head != Z_NULL)
1096 + state->head->extra_len = (unsigned)hold;
1097 + if ((state->flags & 0x0200) && (state->wrap & 4))
1098 + CRC2(state->check, hold);
1099 + INITBITS();
1100 + }
1101 + else if (state->head != Z_NULL)
1102 + state->head->extra = Z_NULL;
1103 + state->mode = EXTRA;
1104 + case EXTRA:
1105 + if (state->flags & 0x0400) {
1106 + copy = state->length;
1107 + if (copy > have) copy = have;
1108 + if (copy) {
1109 + if (state->head != Z_NULL &&
1110 + state->head->extra != Z_NULL) {
1111 + len = state->head->extra_len - state->length;
1112 + zmemcpy(state->head->extra + len, next,
1113 + len + copy > state->head->extra_max ?
1114 + state->head->extra_max - len : copy);
1115 + }
1116 + if ((state->flags & 0x0200) && (state->wrap & 4))
1117 + state->check = crc32(state->check, next, copy);
1118 + have -= copy;
1119 + next += copy;
1120 + state->length -= copy;
1121 + }
1122 + if (state->length) goto inf_leave;
1123 + }
1124 + state->length = 0;
1125 + state->mode = NAME;
1126 + case NAME:
1127 + if (state->flags & 0x0800) {
1128 + if (have == 0) goto inf_leave;
1129 + copy = 0;
1130 + do {
1131 + len = (unsigned)(next[copy++]);
1132 + if (state->head != Z_NULL &&
1133 + state->head->name != Z_NULL &&
1134 + state->length < state->head->name_max)
1135 + state->head->name[state->length++] = (Bytef)len;
1136 + } while (len && copy < have);
1137 + if ((state->flags & 0x0200) && (state->wrap & 4))
1138 + state->check = crc32(state->check, next, copy);
1139 + have -= copy;
1140 + next += copy;
1141 + if (len) goto inf_leave;
1142 + }
1143 + else if (state->head != Z_NULL)
1144 + state->head->name = Z_NULL;
1145 + state->length = 0;
1146 + state->mode = COMMENT;
1147 + case COMMENT:
1148 + if (state->flags & 0x1000) {
1149 + if (have == 0) goto inf_leave;
1150 + copy = 0;
1151 + do {
1152 + len = (unsigned)(next[copy++]);
1153 + if (state->head != Z_NULL &&
1154 + state->head->comment != Z_NULL &&
1155 + state->length < state->head->comm_max)
1156 + state->head->comment[state->length++] = (Bytef)len;
1157 + } while (len && copy < have);
1158 + if ((state->flags & 0x0200) && (state->wrap & 4))
1159 + state->check = crc32(state->check, next, copy);
1160 + have -= copy;
1161 + next += copy;
1162 + if (len) goto inf_leave;
1163 + }
1164 + else if (state->head != Z_NULL)
1165 + state->head->comment = Z_NULL;
1166 + state->mode = HCRC;
1167 + case HCRC:
1168 + if (state->flags & 0x0200) {
1169 + NEEDBITS(16);
1170 + if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
1171 + strm->msg = (char *)"header crc mismatch";
1172 + state->mode = BAD;
1173 + break;
1174 + }
1175 + INITBITS();
1176 + }
1177 + if (state->head != Z_NULL) {
1178 + state->head->hcrc = (int)((state->flags >> 9) & 1);
1179 + state->head->done = 1;
1180 + }
1181 + strm->adler = state->check = crc32(0L, Z_NULL, 0);
1182 + state->mode = TYPE;
1183 + break;
1184 +#endif
1185 + case DICTID:
1186 + NEEDBITS(32);
1187 + strm->adler = state->check = ZSWAP32(hold);
1188 + INITBITS();
1189 + state->mode = DICT;
1190 + case DICT:
1191 + if (state->havedict == 0) {
1192 + RESTORE();
1193 + return Z_NEED_DICT;
1194 + }
1195 + strm->adler = state->check = adler32(0L, Z_NULL, 0);
1196 + state->mode = TYPE;
1197 + case TYPE:
1198 + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
1199 + case TYPEDO:
1200 + if (state->last) {
1201 + BYTEBITS();
1202 + state->mode = CHECK;
1203 + break;
1204 + }
1205 + NEEDBITS(3);
1206 + state->last = BITS(1);
1207 + DROPBITS(1);
1208 + switch (BITS(2)) {
1209 + case 0: /* stored block */
1210 + Tracev((stderr, "inflate: stored block%s\n",
1211 + state->last ? " (last)" : ""));
1212 + state->mode = STORED;
1213 + break;
1214 + case 1: /* fixed block */
1215 + fixedtables(state);
1216 + Tracev((stderr, "inflate: fixed codes block%s\n",
1217 + state->last ? " (last)" : ""));
1218 + state->mode = LEN_; /* decode codes */
1219 + if (flush == Z_TREES) {
1220 + DROPBITS(2);
1221 + goto inf_leave;
1222 + }
1223 + break;
1224 + case 2: /* dynamic block */
1225 + Tracev((stderr, "inflate: dynamic codes block%s\n",
1226 + state->last ? " (last)" : ""));
1227 + state->mode = TABLE;
1228 + break;
1229 + case 3:
1230 + strm->msg = (char *)"invalid block type";
1231 + state->mode = BAD;
1232 + }
1233 + DROPBITS(2);
1234 + break;
1235 + case STORED:
1236 + BYTEBITS(); /* go to byte boundary */
1237 + NEEDBITS(32);
1238 + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
1239 + strm->msg = (char *)"invalid stored block lengths";
1240 + state->mode = BAD;
1241 + break;
1242 + }
1243 + state->length = (unsigned)hold & 0xffff;
1244 + Tracev((stderr, "inflate: stored length %u\n",
1245 + state->length));
1246 + INITBITS();
1247 + state->mode = COPY_;
1248 + if (flush == Z_TREES) goto inf_leave;
1249 + case COPY_:
1250 + state->mode = COPY;
1251 + case COPY:
1252 + copy = state->length;
1253 + if (copy) {
1254 + if (copy > have) copy = have;
1255 + if (copy > left) copy = left;
1256 + if (copy == 0) goto inf_leave;
1257 + zmemcpy(put, next, copy);
1258 + have -= copy;
1259 + next += copy;
1260 + left -= copy;
1261 + put += copy;
1262 + state->length -= copy;
1263 + break;
1264 + }
1265 + Tracev((stderr, "inflate: stored end\n"));
1266 + state->mode = TYPE;
1267 + break;
1268 + case TABLE:
1269 + NEEDBITS(14);
1270 + state->nlen = BITS(5) + 257;
1271 + DROPBITS(5);
1272 + state->ndist = BITS(5) + 1;
1273 + DROPBITS(5);
1274 + state->ncode = BITS(4) + 4;
1275 + DROPBITS(4);
1276 +#ifndef PKZIP_BUG_WORKAROUND
1277 + if (state->nlen > 286 || state->ndist > 30) {
1278 + strm->msg = (char *)"too many length or distance symbols";
1279 + state->mode = BAD;
1280 + break;
1281 + }
1282 +#endif
1283 + Tracev((stderr, "inflate: table sizes ok\n"));
1284 + state->have = 0;
1285 + state->mode = LENLENS;
1286 + case LENLENS:
1287 + while (state->have < state->ncode) {
1288 + NEEDBITS(3);
1289 + state->lens[order[state->have++]] = (unsigned short)BITS(3);
1290 + DROPBITS(3);
1291 + }
1292 + while (state->have < 19)
1293 + state->lens[order[state->have++]] = 0;
1294 + state->next = state->codes;
1295 + state->lencode = (const code FAR *)(state->next);
1296 + state->lenbits = 7;
1297 + ret = inflate_table(CODES, state->lens, 19, &(state->next),
1298 + &(state->lenbits), state->work);
1299 + if (ret) {
1300 + strm->msg = (char *)"invalid code lengths set";
1301 + state->mode = BAD;
1302 + break;
1303 + }
1304 + Tracev((stderr, "inflate: code lengths ok\n"));
1305 + state->have = 0;
1306 + state->mode = CODELENS;
1307 + case CODELENS:
1308 + while (state->have < state->nlen + state->ndist) {
1309 + for (;;) {
1310 + here = state->lencode[BITS(state->lenbits)];
1311 + if ((unsigned)(here.bits) <= bits) break;
1312 + PULLBYTE();
1313 + }
1314 + if (here.val < 16) {
1315 + DROPBITS(here.bits);
1316 + state->lens[state->have++] = here.val;
1317 + }
1318 + else {
1319 + if (here.val == 16) {
1320 + NEEDBITS(here.bits + 2);
1321 + DROPBITS(here.bits);
1322 + if (state->have == 0) {
1323 + strm->msg = (char *)"invalid bit length repeat";
1324 + state->mode = BAD;
1325 + break;
1326 + }
1327 + len = state->lens[state->have - 1];
1328 + copy = 3 + BITS(2);
1329 + DROPBITS(2);
1330 + }
1331 + else if (here.val == 17) {
1332 + NEEDBITS(here.bits + 3);
1333 + DROPBITS(here.bits);
1334 + len = 0;
1335 + copy = 3 + BITS(3);
1336 + DROPBITS(3);
1337 + }
1338 + else {
1339 + NEEDBITS(here.bits + 7);
1340 + DROPBITS(here.bits);
1341 + len = 0;
1342 + copy = 11 + BITS(7);
1343 + DROPBITS(7);
1344 + }
1345 + if (state->have + copy > state->nlen + state->ndist) {
1346 + strm->msg = (char *)"invalid bit length repeat";
1347 + state->mode = BAD;
1348 + break;
1349 + }
1350 + while (copy--)
1351 + state->lens[state->have++] = (unsigned short)len;
1352 + }
1353 + }
1354 +
1355 + /* handle error breaks in while */
1356 + if (state->mode == BAD) break;
1357 +
1358 + /* check for end-of-block code (better have one) */
1359 + if (state->lens[256] == 0) {
1360 + strm->msg = (char *)"invalid code -- missing end-of-block";
1361 + state->mode = BAD;
1362 + break;
1363 + }
1364 +
1365 + /* build code tables -- note: do not change the lenbits or distbits
1366 + values here (9 and 6) without reading the comments in inftrees.h
1367 + concerning the ENOUGH constants, which depend on those values */
1368 + state->next = state->codes;
1369 + state->lencode = (const code FAR *)(state->next);
1370 + state->lenbits = 9;
1371 + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
1372 + &(state->lenbits), state->work);
1373 + if (ret) {
1374 + strm->msg = (char *)"invalid literal/lengths set";
1375 + state->mode = BAD;
1376 + break;
1377 + }
1378 + state->distcode = (const code FAR *)(state->next);
1379 + state->distbits = 6;
1380 + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
1381 + &(state->next), &(state->distbits), state->work);
1382 + if (ret) {
1383 + strm->msg = (char *)"invalid distances set";
1384 + state->mode = BAD;
1385 + break;
1386 + }
1387 + Tracev((stderr, "inflate: codes ok\n"));
1388 + state->mode = LEN_;
1389 + if (flush == Z_TREES) goto inf_leave;
1390 + case LEN_:
1391 + state->mode = LEN;
1392 + case LEN:
1393 + if (have >= 6 && left >= 258) {
1394 + RESTORE();
1395 + inflate_fast(strm, out);
1396 + LOAD();
1397 + if (state->mode == TYPE)
1398 + state->back = -1;
1399 + break;
1400 + }
1401 + state->back = 0;
1402 + for (;;) {
1403 + here = state->lencode[BITS(state->lenbits)];
1404 + if ((unsigned)(here.bits) <= bits) break;
1405 + PULLBYTE();
1406 + }
1407 + if (here.op && (here.op & 0xf0) == 0) {
1408 + last = here;
1409 + for (;;) {
1410 + here = state->lencode[last.val +
1411 + (BITS(last.bits + last.op) >> last.bits)];
1412 + if ((unsigned)(last.bits + here.bits) <= bits) break;
1413 + PULLBYTE();
1414 + }
1415 + DROPBITS(last.bits);
1416 + state->back += last.bits;
1417 + }
1418 + DROPBITS(here.bits);
1419 + state->back += here.bits;
1420 + state->length = (unsigned)here.val;
1421 + if ((int)(here.op) == 0) {
1422 + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
1423 + "inflate: literal '%c'\n" :
1424 + "inflate: literal 0x%02x\n", here.val));
1425 + state->mode = LIT;
1426 + break;
1427 + }
1428 + if (here.op & 32) {
1429 + Tracevv((stderr, "inflate: end of block\n"));
1430 + state->back = -1;
1431 + state->mode = TYPE;
1432 + break;
1433 + }
1434 + if (here.op & 64) {
1435 + strm->msg = (char *)"invalid literal/length code";
1436 + state->mode = BAD;
1437 + break;
1438 + }
1439 + state->extra = (unsigned)(here.op) & 15;
1440 + state->mode = LENEXT;
1441 + case LENEXT:
1442 + if (state->extra) {
1443 + NEEDBITS(state->extra);
1444 + state->length += BITS(state->extra);
1445 + DROPBITS(state->extra);
1446 + state->back += state->extra;
1447 + }
1448 + Tracevv((stderr, "inflate: length %u\n", state->length));
1449 + state->was = state->length;
1450 + state->mode = DIST;
1451 + case DIST:
1452 + for (;;) {
1453 + here = state->distcode[BITS(state->distbits)];
1454 + if ((unsigned)(here.bits) <= bits) break;
1455 + PULLBYTE();
1456 + }
1457 + if ((here.op & 0xf0) == 0) {
1458 + last = here;
1459 + for (;;) {
1460 + here = state->distcode[last.val +
1461 + (BITS(last.bits + last.op) >> last.bits)];
1462 + if ((unsigned)(last.bits + here.bits) <= bits) break;
1463 + PULLBYTE();
1464 + }
1465 + DROPBITS(last.bits);
1466 + state->back += last.bits;
1467 + }
1468 + DROPBITS(here.bits);
1469 + state->back += here.bits;
1470 + if (here.op & 64) {
1471 + strm->msg = (char *)"invalid distance code";
1472 + state->mode = BAD;
1473 + break;
1474 + }
1475 + state->offset = (unsigned)here.val;
1476 + state->extra = (unsigned)(here.op) & 15;
1477 + state->mode = DISTEXT;
1478 + case DISTEXT:
1479 + if (state->extra) {
1480 + NEEDBITS(state->extra);
1481 + state->offset += BITS(state->extra);
1482 + DROPBITS(state->extra);
1483 + state->back += state->extra;
1484 + }
1485 +#ifdef INFLATE_STRICT
1486 + if (state->offset > state->dmax) {
1487 + strm->msg = (char *)"invalid distance too far back";
1488 + state->mode = BAD;
1489 + break;
1490 + }
1491 +#endif
1492 + Tracevv((stderr, "inflate: distance %u\n", state->offset));
1493 + state->mode = MATCH;
1494 + case MATCH:
1495 + if (left == 0) goto inf_leave;
1496 + copy = out - left;
1497 + if (state->offset > copy) { /* copy from window */
1498 + copy = state->offset - copy;
1499 + if (copy > state->whave) {
1500 + if (state->sane) {
1501 + strm->msg = (char *)"invalid distance too far back";
1502 + state->mode = BAD;
1503 + break;
1504 + }
1505 +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
1506 + Trace((stderr, "inflate.c too far\n"));
1507 + copy -= state->whave;
1508 + if (copy > state->length) copy = state->length;
1509 + if (copy > left) copy = left;
1510 + left -= copy;
1511 + state->length -= copy;
1512 + do {
1513 + *put++ = 0;
1514 + } while (--copy);
1515 + if (state->length == 0) state->mode = LEN;
1516 + break;
1517 +#endif
1518 + }
1519 + if (copy > state->wnext) {
1520 + copy -= state->wnext;
1521 + from = state->window + (state->wsize - copy);
1522 + }
1523 + else
1524 + from = state->window + (state->wnext - copy);
1525 + if (copy > state->length) copy = state->length;
1526 + }
1527 + else { /* copy from output */
1528 + from = put - state->offset;
1529 + copy = state->length;
1530 + }
1531 + if (copy > left) copy = left;
1532 + left -= copy;
1533 + state->length -= copy;
1534 + do {
1535 + *put++ = *from++;
1536 + } while (--copy);
1537 + if (state->length == 0) state->mode = LEN;
1538 + break;
1539 + case LIT:
1540 + if (left == 0) goto inf_leave;
1541 + *put++ = (unsigned char)(state->length);
1542 + left--;
1543 + state->mode = LEN;
1544 + break;
1545 + case CHECK:
1546 + if (state->wrap) {
1547 + NEEDBITS(32);
1548 + out -= left;
1549 + strm->total_out += out;
1550 + state->total += out;
1551 + if ((state->wrap & 4) && out)
1552 + strm->adler = state->check =
1553 + UPDATE(state->check, put - out, out);
1554 + out = left;
1555 + if ((state->wrap & 4) && (
1556 +#ifdef GUNZIP
1557 + state->flags ? hold :
1558 +#endif
1559 + ZSWAP32(hold)) != state->check) {
1560 + strm->msg = (char *)"incorrect data check";
1561 + state->mode = BAD;
1562 + break;
1563 + }
1564 + INITBITS();
1565 + Tracev((stderr, "inflate: check matches trailer\n"));
1566 + }
1567 +#ifdef GUNZIP
1568 + state->mode = LENGTH;
1569 + case LENGTH:
1570 + if (state->wrap && state->flags) {
1571 + NEEDBITS(32);
1572 + if (hold != (state->total & 0xffffffffUL)) {
1573 + strm->msg = (char *)"incorrect length check";
1574 + state->mode = BAD;
1575 + break;
1576 + }
1577 + INITBITS();
1578 + Tracev((stderr, "inflate: length matches trailer\n"));
1579 + }
1580 +#endif
1581 + state->mode = DONE;
1582 + case DONE:
1583 + ret = Z_STREAM_END;
1584 + goto inf_leave;
1585 + case BAD:
1586 + ret = Z_DATA_ERROR;
1587 + goto inf_leave;
1588 + case MEM:
1589 + return Z_MEM_ERROR;
1590 + case SYNC:
1591 + default:
1592 + return Z_STREAM_ERROR;
1593 + }
1594 +
1595 + /*
1596 + Return from inflate(), updating the total counts and the check value.
1597 + If there was no progress during the inflate() call, return a buffer
1598 + error. Call updatewindow() to create and/or update the window state.
1599 + Note: a memory error from inflate() is non-recoverable.
1600 + */
1601 + inf_leave:
1602 + RESTORE();
1603 + if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
1604 + (state->mode < CHECK || flush != Z_FINISH)))
1605 + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
1606 + state->mode = MEM;
1607 + return Z_MEM_ERROR;
1608 + }
1609 + in -= strm->avail_in;
1610 + out -= strm->avail_out;
1611 + strm->total_in += in;
1612 + strm->total_out += out;
1613 + state->total += out;
1614 + if ((state->wrap & 4) && out)
1615 + strm->adler = state->check =
1616 + UPDATE(state->check, strm->next_out - out, out);
1617 + strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
1618 + (state->mode == TYPE ? 128 : 0) +
1619 + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
1620 + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
1621 + ret = Z_BUF_ERROR;
1622 + return ret;
1623 +}
1624 +
1625 +int ZEXPORT inflateEnd(strm)
1626 +z_streamp strm;
1627 +{
1628 + struct inflate_state FAR *state;
1629 + if (inflateStateCheck(strm))
1630 + return Z_STREAM_ERROR;
1631 + state = (struct inflate_state FAR *)strm->state;
1632 + if (state->window != Z_NULL) ZFREE(strm, state->window);
1633 + ZFREE(strm, strm->state);
1634 + strm->state = Z_NULL;
1635 + Tracev((stderr, "inflate: end\n"));
1636 + return Z_OK;
1637 +}
1638 +
1639 +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
1640 +z_streamp strm;
1641 +Bytef *dictionary;
1642 +uInt *dictLength;
1643 +{
1644 + struct inflate_state FAR *state;
1645 +
1646 + /* check state */
1647 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1648 + state = (struct inflate_state FAR *)strm->state;
1649 +
1650 + /* copy dictionary */
1651 + if (state->whave && dictionary != Z_NULL) {
1652 + zmemcpy(dictionary, state->window + state->wnext,
1653 + state->whave - state->wnext);
1654 + zmemcpy(dictionary + state->whave - state->wnext,
1655 + state->window, state->wnext);
1656 + }
1657 + if (dictLength != Z_NULL)
1658 + *dictLength = state->whave;
1659 + return Z_OK;
1660 +}
1661 +
1662 +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
1663 +z_streamp strm;
1664 +const Bytef *dictionary;
1665 +uInt dictLength;
1666 +{
1667 + struct inflate_state FAR *state;
1668 + unsigned long dictid;
1669 + int ret;
1670 +
1671 + /* check state */
1672 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1673 + state = (struct inflate_state FAR *)strm->state;
1674 + if (state->wrap != 0 && state->mode != DICT)
1675 + return Z_STREAM_ERROR;
1676 +
1677 + /* check for correct dictionary identifier */
1678 + if (state->mode == DICT) {
1679 + dictid = adler32(0L, Z_NULL, 0);
1680 + dictid = adler32(dictid, dictionary, dictLength);
1681 + if (dictid != state->check)
1682 + return Z_DATA_ERROR;
1683 + }
1684 +
1685 + /* copy dictionary to window using updatewindow(), which will amend the
1686 + existing dictionary if appropriate */
1687 + ret = updatewindow(strm, dictionary + dictLength, dictLength);
1688 + if (ret) {
1689 + state->mode = MEM;
1690 + return Z_MEM_ERROR;
1691 + }
1692 + state->havedict = 1;
1693 + Tracev((stderr, "inflate: dictionary set\n"));
1694 + return Z_OK;
1695 +}
1696 +
1697 +int ZEXPORT inflateGetHeader(strm, head)
1698 +z_streamp strm;
1699 +gz_headerp head;
1700 +{
1701 + struct inflate_state FAR *state;
1702 +
1703 + /* check state */
1704 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1705 + state = (struct inflate_state FAR *)strm->state;
1706 + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
1707 +
1708 + /* save header structure */
1709 + state->head = head;
1710 + head->done = 0;
1711 + return Z_OK;
1712 +}
1713 +
1714 +/*
1715 + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
1716 + or when out of input. When called, *have is the number of pattern bytes
1717 + found in order so far, in 0..3. On return *have is updated to the new
1718 + state. If on return *have equals four, then the pattern was found and the
1719 + return value is how many bytes were read including the last byte of the
1720 + pattern. If *have is less than four, then the pattern has not been found
1721 + yet and the return value is len. In the latter case, syncsearch() can be
1722 + called again with more data and the *have state. *have is initialized to
1723 + zero for the first call.
1724 + */
1725 +local unsigned syncsearch(have, buf, len)
1726 +unsigned FAR *have;
1727 +const unsigned char FAR *buf;
1728 +unsigned len;
1729 +{
1730 + unsigned got;
1731 + unsigned next;
1732 +
1733 + got = *have;
1734 + next = 0;
1735 + while (next < len && got < 4) {
1736 + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
1737 + got++;
1738 + else if (buf[next])
1739 + got = 0;
1740 + else
1741 + got = 4 - got;
1742 + next++;
1743 + }
1744 + *have = got;
1745 + return next;
1746 +}
1747 +
1748 +int ZEXPORT inflateSync(strm)
1749 +z_streamp strm;
1750 +{
1751 + unsigned len; /* number of bytes to look at or looked at */
1752 + unsigned long in, out; /* temporary to save total_in and total_out */
1753 + unsigned char buf[4]; /* to restore bit buffer to byte string */
1754 + struct inflate_state FAR *state;
1755 +
1756 + /* check parameters */
1757 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1758 + state = (struct inflate_state FAR *)strm->state;
1759 + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
1760 +
1761 + /* if first time, start search in bit buffer */
1762 + if (state->mode != SYNC) {
1763 + state->mode = SYNC;
1764 + state->hold <<= state->bits & 7;
1765 + state->bits -= state->bits & 7;
1766 + len = 0;
1767 + while (state->bits >= 8) {
1768 + buf[len++] = (unsigned char)(state->hold);
1769 + state->hold >>= 8;
1770 + state->bits -= 8;
1771 + }
1772 + state->have = 0;
1773 + syncsearch(&(state->have), buf, len);
1774 + }
1775 +
1776 + /* search available input */
1777 + len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
1778 + strm->avail_in -= len;
1779 + strm->next_in += len;
1780 + strm->total_in += len;
1781 +
1782 + /* return no joy or set up to restart inflate() on a new block */
1783 + if (state->have != 4) return Z_DATA_ERROR;
1784 + in = strm->total_in; out = strm->total_out;
1785 + inflateReset(strm);
1786 + strm->total_in = in; strm->total_out = out;
1787 + state->mode = TYPE;
1788 + return Z_OK;
1789 +}
1790 +
1791 +/*
1792 + Returns true if inflate is currently at the end of a block generated by
1793 + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
1794 + implementation to provide an additional safety check. PPP uses
1795 + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
1796 + block. When decompressing, PPP checks that at the end of input packet,
1797 + inflate is waiting for these length bytes.
1798 + */
1799 +int ZEXPORT inflateSyncPoint(strm)
1800 +z_streamp strm;
1801 +{
1802 + struct inflate_state FAR *state;
1803 +
1804 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1805 + state = (struct inflate_state FAR *)strm->state;
1806 + return state->mode == STORED && state->bits == 0;
1807 +}
1808 +
1809 +int ZEXPORT inflateCopy(dest, source)
1810 +z_streamp dest;
1811 +z_streamp source;
1812 +{
1813 + struct inflate_state FAR *state;
1814 + struct inflate_state FAR *copy;
1815 + unsigned char FAR *window;
1816 + unsigned wsize;
1817 +
1818 + /* check input */
1819 + if (inflateStateCheck(source) || dest == Z_NULL)
1820 + return Z_STREAM_ERROR;
1821 + state = (struct inflate_state FAR *)source->state;
1822 +
1823 + /* allocate space */
1824 + copy = (struct inflate_state FAR *)
1825 + ZALLOC(source, 1, sizeof(struct inflate_state));
1826 + if (copy == Z_NULL) return Z_MEM_ERROR;
1827 + window = Z_NULL;
1828 + if (state->window != Z_NULL) {
1829 + window = (unsigned char FAR *)
1830 + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
1831 + if (window == Z_NULL) {
1832 + ZFREE(source, copy);
1833 + return Z_MEM_ERROR;
1834 + }
1835 + }
1836 +
1837 + /* copy state */
1838 + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
1839 + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
1840 + copy->strm = dest;
1841 + if (state->lencode >= state->codes &&
1842 + state->lencode <= state->codes + ENOUGH - 1) {
1843 + copy->lencode = copy->codes + (state->lencode - state->codes);
1844 + copy->distcode = copy->codes + (state->distcode - state->codes);
1845 + }
1846 + copy->next = copy->codes + (state->next - state->codes);
1847 + if (window != Z_NULL) {
1848 + wsize = 1U << state->wbits;
1849 + zmemcpy(window, state->window, wsize);
1850 + }
1851 + copy->window = window;
1852 + dest->state = (struct internal_state FAR *)copy;
1853 + return Z_OK;
1854 +}
1855 +
1856 +int ZEXPORT inflateUndermine(strm, subvert)
1857 +z_streamp strm;
1858 +int subvert;
1859 +{
1860 + struct inflate_state FAR *state;
1861 +
1862 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1863 + state = (struct inflate_state FAR *)strm->state;
1864 +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
1865 + state->sane = !subvert;
1866 + return Z_OK;
1867 +#else
1868 + (void)subvert;
1869 + state->sane = 1;
1870 + return Z_DATA_ERROR;
1871 +#endif
1872 +}
1873 +
1874 +int ZEXPORT inflateValidate(strm, check)
1875 +z_streamp strm;
1876 +int check;
1877 +{
1878 + struct inflate_state FAR *state;
1879 +
1880 + if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
1881 + state = (struct inflate_state FAR *)strm->state;
1882 + if (check)
1883 + state->wrap |= 4;
1884 + else
1885 + state->wrap &= ~4;
1886 + return Z_OK;
1887 +}
1888 +
1889 +long ZEXPORT inflateMark(strm)
1890 +z_streamp strm;
1891 +{
1892 + struct inflate_state FAR *state;
1893 +
1894 + if (inflateStateCheck(strm))
1895 + return -(1L << 16);
1896 + state = (struct inflate_state FAR *)strm->state;
1897 + return (long)(((unsigned long)((long)state->back)) << 16) +
1898 + (state->mode == COPY ? state->length :
1899 + (state->mode == MATCH ? state->was - state->length : 0));
1900 +}
1901 +
1902 +unsigned long ZEXPORT inflateCodesUsed(strm)
1903 +z_streamp strm;
1904 +{
1905 + struct inflate_state FAR *state;
1906 + if (inflateStateCheck(strm)) return (unsigned long)-1;
1907 + state = (struct inflate_state FAR *)strm->state;
1908 + return (unsigned long)(state->next - state->codes);
1909 +}
1910
1911 From 247147654fe5cd11cf15d8dff91440405ea57040 Mon Sep 17 00:00:00 2001
1912 From: Simon Hosie <simon.hosie@arm.com>
1913 Date: Wed, 12 Apr 2017 15:44:21 -0700
1914 Subject: [PATCH 2/2] Inflate using wider loads and stores
1915
1916 In inflate_fast() the output pointer always has plenty of room to write. This
1917 means that so long as the target is capable, wide un-aligned loads and stores
1918 can be used to transfer several bytes at once. When the reference distance is
1919 too short simply unroll the data a little to increase the distance.
1920
1921 Change-Id: I59854eb25d2b1e43561c8a2afaf9175bf10cf674
1922 ---
1923 contrib/arm/chunkcopy.h | 279 ++++++++++++++++++++++++++++++++++++++++++++++++
1924 contrib/arm/inffast.c | 96 +++++++----------
1925 contrib/arm/inflate.c | 22 ++--
1926 3 files changed, 335 insertions(+), 62 deletions(-)
1927 create mode 100644 contrib/arm/chunkcopy.h
1928
1929 diff --git a/contrib/arm/chunkcopy.h b/contrib/arm/chunkcopy.h
1930 new file mode 100644
1931 index 00000000..2d6fd6f9
1932 --- /dev/null
1933 +++ b/contrib/arm/chunkcopy.h
1934 @@ -0,0 +1,279 @@
1935 +/* chunkcopy.h -- fast copies and sets
1936 + * Copyright (C) 2017 ARM, Inc.
1937 + * For conditions of distribution and use, see copyright notice in zlib.h
1938 + */
1939 +
1940 +#ifndef CHUNKCOPY_H
1941 +#define CHUNKCOPY_H
1942 +
1943 +#include "zutil.h"
1944 +#include <arm_neon.h>
1945 +
1946 +#if __STDC_VERSION__ >= 199901L
1947 +#define Z_RESTRICT restrict
1948 +#else
1949 +#define Z_RESTRICT
1950 +#endif
1951 +
1952 +typedef uint8x16_t chunkcopy_chunk_t;
1953 +#define CHUNKCOPY_CHUNK_SIZE sizeof(chunkcopy_chunk_t)
1954 +
1955 +/*
1956 + Ask the compiler to perform a wide, unaligned load with an machine
1957 + instruction appropriate for the chunkcopy_chunk_t type.
1958 + */
1959 +static inline chunkcopy_chunk_t loadchunk(const unsigned char FAR *s) {
1960 + chunkcopy_chunk_t c;
1961 + __builtin_memcpy(&c, s, sizeof(c));
1962 + return c;
1963 +}
1964 +
1965 +/*
1966 + Ask the compiler to perform a wide, unaligned store with an machine
1967 + instruction appropriate for the chunkcopy_chunk_t type.
1968 + */
1969 +static inline void storechunk(unsigned char FAR *d, chunkcopy_chunk_t c) {
1970 + __builtin_memcpy(d, &c, sizeof(c));
1971 +}
1972 +
1973 +/*
1974 + Perform a memcpy-like operation, but assume that length is non-zero and that
1975 + it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if
1976 + the length is shorter than this.
1977 +
1978 + It also guarantees that it will properly unroll the data if the distance
1979 + between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on
1980 + in chunkcopy_relaxed().
1981 +
1982 + Aside from better memory bus utilisation, this means that short copies
1983 + (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop
1984 + without iteration, which will hopefully make the branch prediction more
1985 + reliable.
1986 + */
1987 +static inline unsigned char FAR *chunkcopy_core(unsigned char FAR *out,
1988 + const unsigned char FAR *from,
1989 + unsigned len) {
1990 + int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1;
1991 + storechunk(out, loadchunk(from));
1992 + out += bump;
1993 + from += bump;
1994 + len /= CHUNKCOPY_CHUNK_SIZE;
1995 + while (len-- > 0) {
1996 + storechunk(out, loadchunk(from));
1997 + out += CHUNKCOPY_CHUNK_SIZE;
1998 + from += CHUNKCOPY_CHUNK_SIZE;
1999 + }
2000 + return out;
2001 +}
2002 +
2003 +/*
2004 + Like chunkcopy_core, but avoid writing beyond of legal output.
2005 +
2006 + Accepts an additional pointer to the end of safe output. A generic safe
2007 + copy would use (out + len), but it's normally the case that the end of the
2008 + output buffer is beyond the end of the current copy, and this can still be
2009 + exploited.
2010 + */
2011 +static inline unsigned char FAR *chunkcopy_core_safe(unsigned char FAR *out,
2012 + const unsigned char FAR * from,
2013 + unsigned len,
2014 + unsigned char FAR *limit) {
2015 + Assert(out + len <= limit, "chunk copy exceeds safety limit");
2016 + if (limit - out < CHUNKCOPY_CHUNK_SIZE) {
2017 + const unsigned char FAR * Z_RESTRICT rfrom = from;
2018 + if (len & 8) { __builtin_memcpy(out, rfrom, 8); out += 8; rfrom += 8; }
2019 + if (len & 4) { __builtin_memcpy(out, rfrom, 4); out += 4; rfrom += 4; }
2020 + if (len & 2) { __builtin_memcpy(out, rfrom, 2); out += 2; rfrom += 2; }
2021 + if (len & 1) { *out++ = *rfrom++; }
2022 + return out;
2023 + }
2024 + return chunkcopy_core(out, from, len);
2025 +}
2026 +
2027 +/*
2028 + Perform short copies until distance can be rewritten as being at least
2029 + CHUNKCOPY_CHUNK_SIZE.
2030 +
2031 + This assumes that it's OK to overwrite at least the first
2032 + 2*CHUNKCOPY_CHUNK_SIZE bytes of output even if the copy is shorter than
2033 + this. This assumption holds within inflate_fast() which starts every
2034 + iteration with at least 258 bytes of output space available (258 being the
2035 + maximum length output from a single token; see inffast.c).
2036 + */
2037 +static inline unsigned char FAR *chunkunroll_relaxed(unsigned char FAR *out,
2038 + unsigned FAR *dist,
2039 + unsigned FAR *len) {
2040 + const unsigned char FAR *from = out - *dist;
2041 + while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) {
2042 + storechunk(out, loadchunk(from));
2043 + out += *dist;
2044 + *len -= *dist;
2045 + *dist += *dist;
2046 + }
2047 + return out;
2048 +}
2049 +
2050 +
2051 +static inline uint8x16_t chunkset_vld1q_dup_u8x8(const unsigned char FAR * Z_RESTRICT from) {
2052 +#if defined(__clang__) || defined(__aarch64__)
2053 + return vreinterpretq_u8_u64(vld1q_dup_u64((void *)from));
2054 +#else
2055 + /* 32-bit GCC uses an alignment hint for vld1q_dup_u64, even when given a
2056 + * void pointer, so here's an alternate implementation.
2057 + */
2058 + uint8x8_t h = vld1_u8(from);
2059 + return vcombine_u8(h, h);
2060 +#endif
2061 +}
2062 +
2063 +/*
2064 + Perform an overlapping copy which behaves as a memset() operation, but
2065 + supporting periods other than one, and assume that length is non-zero and
2066 + that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output
2067 + even if the length is shorter than this.
2068 + */
2069 +static inline unsigned char FAR *chunkset_core(unsigned char FAR *out,
2070 + unsigned period,
2071 + unsigned len) {
2072 + uint8x16_t f;
2073 + int bump = ((len - 1) % sizeof(f)) + 1;
2074 +
2075 + switch (period) {
2076 + case 1:
2077 + f = vld1q_dup_u8(out - 1);
2078 + vst1q_u8(out, f);
2079 + out += bump;
2080 + len -= bump;
2081 + while (len > 0) {
2082 + vst1q_u8(out, f);
2083 + out += sizeof(f);
2084 + len -= sizeof(f);
2085 + }
2086 + return out;
2087 + case 2:
2088 + f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2)));
2089 + vst1q_u8(out, f);
2090 + out += bump;
2091 + len -= bump;
2092 + if (len > 0) {
2093 + f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2)));
2094 + do {
2095 + vst1q_u8(out, f);
2096 + out += sizeof(f);
2097 + len -= sizeof(f);
2098 + } while (len > 0);
2099 + }
2100 + return out;
2101 + case 4:
2102 + f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4)));
2103 + vst1q_u8(out, f);
2104 + out += bump;
2105 + len -= bump;
2106 + if (len > 0) {
2107 + f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4)));
2108 + do {
2109 + vst1q_u8(out, f);
2110 + out += sizeof(f);
2111 + len -= sizeof(f);
2112 + } while (len > 0);
2113 + }
2114 + return out;
2115 + case 8:
2116 + f = chunkset_vld1q_dup_u8x8(out - 8);
2117 + vst1q_u8(out, f);
2118 + out += bump;
2119 + len -= bump;
2120 + if (len > 0) {
2121 + f = chunkset_vld1q_dup_u8x8(out - 8);
2122 + do {
2123 + vst1q_u8(out, f);
2124 + out += sizeof(f);
2125 + len -= sizeof(f);
2126 + } while (len > 0);
2127 + }
2128 + return out;
2129 + }
2130 + out = chunkunroll_relaxed(out, &period, &len);
2131 + return chunkcopy_core(out, out - period, len);
2132 +}
2133 +
2134 +/*
2135 + Perform a memcpy-like operation, but assume that length is non-zero and that
2136 + it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if
2137 + the length is shorter than this.
2138 +
2139 + Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour
2140 + of overlapping buffers, regardless of the distance between the pointers.
2141 + This is reflected in the `restrict`-qualified pointers, allowing the
2142 + compiler to reorder loads and stores.
2143 + */
2144 +static inline unsigned char FAR *chunkcopy_relaxed(unsigned char FAR * Z_RESTRICT out,
2145 + const unsigned char FAR * Z_RESTRICT from,
2146 + unsigned len) {
2147 + return chunkcopy_core(out, from, len);
2148 +}
2149 +
2150 +/*
2151 + Like chunkcopy_relaxed, but avoid writing beyond of legal output.
2152 +
2153 + Unlike chunkcopy_core_safe() above, no guarantee is made regarding the
2154 + behaviour of overlapping buffers, regardless of the distance between the
2155 + pointers. This is reflected in the `restrict`-qualified pointers, allowing
2156 + the compiler to reorder loads and stores.
2157 +
2158 + Accepts an additional pointer to the end of safe output. A generic safe
2159 + copy would use (out + len), but it's normally the case that the end of the
2160 + output buffer is beyond the end of the current copy, and this can still be
2161 + exploited.
2162 + */
2163 +static inline unsigned char FAR *chunkcopy_safe(unsigned char FAR *out,
2164 + const unsigned char FAR * Z_RESTRICT from,
2165 + unsigned len,
2166 + unsigned char FAR *limit) {
2167 + Assert(out + len <= limit, "chunk copy exceeds safety limit");
2168 + return chunkcopy_core_safe(out, from, len, limit);
2169 +}
2170 +
2171 +/*
2172 + Perform chunky copy within the same buffer, where the source and destination
2173 + may potentially overlap.
2174 +
2175 + Assumes that len > 0 on entry, and that it's safe to write at least
2176 + CHUNKCOPY_CHUNK_SIZE*3 bytes to the output.
2177 + */
2178 +static inline unsigned char FAR *chunkcopy_lapped_relaxed(unsigned char FAR *out,
2179 + unsigned dist,
2180 + unsigned len) {
2181 + if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) {
2182 + return chunkset_core(out, dist, len);
2183 + }
2184 + return chunkcopy_core(out, out - dist, len);
2185 +}
2186 +
2187 +/*
2188 + Behave like chunkcopy_lapped_relaxed, but avoid writing beyond of legal output.
2189 +
2190 + Accepts an additional pointer to the end of safe output. A generic safe
2191 + copy would use (out + len), but it's normally the case that the end of the
2192 + output buffer is beyond the end of the current copy, and this can still be
2193 + exploited.
2194 + */
2195 +static inline unsigned char FAR *chunkcopy_lapped_safe(unsigned char FAR *out,
2196 + unsigned dist,
2197 + unsigned len,
2198 + unsigned char FAR *limit) {
2199 + Assert(out + len <= limit, "chunk copy exceeds safety limit");
2200 + if (limit - out < CHUNKCOPY_CHUNK_SIZE * 3) {
2201 + /* TODO: try harder to optimise this */
2202 + while (len-- > 0) {
2203 + *out = *(out - dist);
2204 + out++;
2205 + }
2206 + return out;
2207 + }
2208 + return chunkcopy_lapped_relaxed(out, dist, len);
2209 +}
2210 +
2211 +#undef Z_RESTRICT
2212 +
2213 +#endif /* CHUNKCOPY_H */
2214 diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast.c
2215 index 0dbd1dbc..f7f50071 100644
2216 --- a/contrib/arm/inffast.c
2217 +++ b/contrib/arm/inffast.c
2218 @@ -7,6 +7,7 @@
2219 #include "inftrees.h"
2220 #include "inflate.h"
2221 #include "inffast.h"
2222 +#include "chunkcopy.h"
2223
2224 #ifdef ASMINF
2225 # pragma message("Assembler code may have bugs -- use at your own risk")
2226 @@ -57,6 +58,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
2227 unsigned char FAR *out; /* local strm->next_out */
2228 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
2229 unsigned char FAR *end; /* while out < end, enough space available */
2230 + unsigned char FAR *limit; /* safety limit for chunky copies */
2231 #ifdef INFLATE_STRICT
2232 unsigned dmax; /* maximum distance from zlib header */
2233 #endif
2234 @@ -84,12 +86,13 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
2235 out = strm->next_out;
2236 beg = out - (start - strm->avail_out);
2237 end = out + (strm->avail_out - 257);
2238 + limit = out + strm->avail_out;
2239 #ifdef INFLATE_STRICT
2240 dmax = state->dmax;
2241 #endif
2242 wsize = state->wsize;
2243 whave = state->whave;
2244 - wnext = state->wnext;
2245 + wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
2246 window = state->window;
2247 hold = state->hold;
2248 bits = state->bits;
2249 @@ -197,70 +200,51 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
2250 #endif
2251 }
2252 from = window;
2253 - if (wnext == 0) { /* very common case */
2254 - from += wsize - op;
2255 - if (op < len) { /* some from window */
2256 - len -= op;
2257 - do {
2258 - *out++ = *from++;
2259 - } while (--op);
2260 - from = out - dist; /* rest from output */
2261 - }
2262 + if (wnext >= op) { /* contiguous in window */
2263 + from += wnext - op;
2264 }
2265 - else if (wnext < op) { /* wrap around window */
2266 - from += wsize + wnext - op;
2267 + else { /* wrap around window */
2268 op -= wnext;
2269 + from += wsize - op;
2270 if (op < len) { /* some from end of window */
2271 len -= op;
2272 - do {
2273 - *out++ = *from++;
2274 - } while (--op);
2275 - from = window;
2276 - if (wnext < len) { /* some from start of window */
2277 - op = wnext;
2278 - len -= op;
2279 - do {
2280 - *out++ = *from++;
2281 - } while (--op);
2282 - from = out - dist; /* rest from output */
2283 - }
2284 + out = chunkcopy_safe(out, from, op, limit);
2285 + from = window; /* more from start of window */
2286 + op = wnext;
2287 + /* This (rare) case can create a situation where
2288 + the first chunkcopy below must be checked.
2289 + */
2290 }
2291 }
2292 - else { /* contiguous in window */
2293 - from += wnext - op;
2294 - if (op < len) { /* some from window */
2295 - len -= op;
2296 - do {
2297 - *out++ = *from++;
2298 - } while (--op);
2299 - from = out - dist; /* rest from output */
2300 - }
2301 - }
2302 - while (len > 2) {
2303 - *out++ = *from++;
2304 - *out++ = *from++;
2305 - *out++ = *from++;
2306 - len -= 3;
2307 - }
2308 - if (len) {
2309 - *out++ = *from++;
2310 - if (len > 1)
2311 - *out++ = *from++;
2312 + if (op < len) { /* still need some from output */
2313 + out = chunkcopy_safe(out, from, op, limit);
2314 + len -= op;
2315 + /* When dist is small the amount of data that can be
2316 + copied from the window is also small, and progress
2317 + towards the dangerous end of the output buffer is
2318 + also small. This means that for trivial memsets and
2319 + for chunkunroll_relaxed() a safety check is
2320 + unnecessary. However, these conditions may not be
2321 + entered at all, and in that case it's possible that
2322 + the main copy is near the end.
2323 + */
2324 + out = chunkunroll_relaxed(out, &dist, &len);
2325 + out = chunkcopy_safe(out, out - dist, len, limit);
2326 + } else {
2327 + /* from points to window, so there is no risk of
2328 + overlapping pointers requiring memset-like behaviour
2329 + */
2330 + out = chunkcopy_safe(out, from, len, limit);
2331 }
2332 }
2333 else {
2334 - from = out - dist; /* copy direct from output */
2335 - do { /* minimum length is three */
2336 - *out++ = *from++;
2337 - *out++ = *from++;
2338 - *out++ = *from++;
2339 - len -= 3;
2340 - } while (len > 2);
2341 - if (len) {
2342 - *out++ = *from++;
2343 - if (len > 1)
2344 - *out++ = *from++;
2345 - }
2346 + /* Whole reference is in range of current output. No
2347 + range checks are necessary because we start with room
2348 + for at least 258 bytes of output, so unroll and roundoff
2349 + operations can write beyond `out+len` so long as they
2350 + stay within 258 bytes of `out`.
2351 + */
2352 + out = chunkcopy_lapped_relaxed(out, dist, len);
2353 }
2354 }
2355 else if ((op & 64) == 0) { /* 2nd level distance code */
2356 diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
2357 index ac333e8c..e40322c3 100644
2358 --- a/contrib/arm/inflate.c
2359 +++ b/contrib/arm/inflate.c
2360 @@ -84,6 +84,7 @@
2361 #include "inftrees.h"
2362 #include "inflate.h"
2363 #include "inffast.h"
2364 +#include "contrib/arm/chunkcopy.h"
2365
2366 #ifdef MAKEFIXED
2367 # ifndef BUILDFIXED
2368 @@ -405,10 +406,20 @@ unsigned copy;
2369
2370 /* if it hasn't been done already, allocate space for the window */
2371 if (state->window == Z_NULL) {
2372 + unsigned wsize = 1U << state->wbits;
2373 state->window = (unsigned char FAR *)
2374 - ZALLOC(strm, 1U << state->wbits,
2375 + ZALLOC(strm, wsize + CHUNKCOPY_CHUNK_SIZE,
2376 sizeof(unsigned char));
2377 if (state->window == Z_NULL) return 1;
2378 +#ifdef INFLATE_CLEAR_UNUSED_UNDEFINED
2379 + /* Copies from the overflow portion of this buffer are undefined and
2380 + may cause analysis tools to raise a warning if we don't initialize
2381 + it. However, this undefined data overwrites other undefined data
2382 + and is subsequently either overwritten or left deliberately
2383 + undefined at the end of decode; so there's really no point.
2384 + */
2385 + memset(state->window + wsize, 0, CHUNKCOPY_CHUNK_SIZE);
2386 +#endif
2387 }
2388
2389 /* if window not in use yet, initialize */
2390 @@ -1175,17 +1186,16 @@ int flush;
2391 else
2392 from = state->window + (state->wnext - copy);
2393 if (copy > state->length) copy = state->length;
2394 + if (copy > left) copy = left;
2395 + put = chunkcopy_safe(put, from, copy, put + left);
2396 }
2397 else { /* copy from output */
2398 - from = put - state->offset;
2399 copy = state->length;
2400 + if (copy > left) copy = left;
2401 + put = chunkcopy_lapped_safe(put, state->offset, copy, put + left);
2402 }
2403 - if (copy > left) copy = left;
2404 left -= copy;
2405 state->length -= copy;
2406 - do {
2407 - *put++ = *from++;
2408 - } while (--copy);
2409 if (state->length == 0) state->mode = LEN;
2410 break;
2411 case LIT: