1 /*
  2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  3  *
  4  * This code is free software; you can redistribute it and/or modify it
  5  * under the terms of the GNU General Public License version 2 only, as
  6  * published by the Free Software Foundation.  Oracle designates this
  7  * particular file as subject to the "Classpath" exception as provided
  8  * by Oracle in the LICENSE file that accompanied this code.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 /* inftrees.c -- generate Huffman trees for efficient decoding
 26  * Copyright (C) 1995-2022 Mark Adler
 27  * For conditions of distribution and use, see copyright notice in zlib.h
 28  */
 29 
 30 #include "zutil.h"
 31 #include "inftrees.h"
 32 
 33 #define MAXBITS 15
 34 
 35 const char inflate_copyright[] =
 36    " inflate 1.2.13 Copyright 1995-2022 Mark Adler ";
 37 /*
 38   If you use the zlib library in a product, an acknowledgment is welcome
 39   in the documentation of your product. If for some reason you cannot
 40   include such an acknowledgment, I would appreciate that you keep this
 41   copyright string in the executable of your product.
 42  */
 43 
 44 /*
 45    Build a set of tables to decode the provided canonical Huffman code.
 46    The code lengths are lens[0..codes-1].  The result starts at *table,
 47    whose indices are 0..2^bits-1.  work is a writable array of at least
 48    lens shorts, which is used as a work area.  type is the type of code
 49    to be generated, CODES, LENS, or DISTS.  On return, zero is success,
 50    -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
 51    on return points to the next available entry's address.  bits is the
 52    requested root table index bits, and on return it is the actual root
 53    table index bits.  It will differ if the request is greater than the
 54    longest code or if it is less than the shortest code.
 55  */
 56 int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
 57 codetype type;
 58 unsigned short FAR *lens;
 59 unsigned codes;
 60 code FAR * FAR *table;
 61 unsigned FAR *bits;
 62 unsigned short FAR *work;
 63 {
 64     unsigned len;               /* a code's length in bits */
 65     unsigned sym;               /* index of code symbols */
 66     unsigned min, max;          /* minimum and maximum code lengths */
 67     unsigned root;              /* number of index bits for root table */
 68     unsigned curr;              /* number of index bits for current table */
 69     unsigned drop;              /* code bits to drop for sub-table */
 70     int left;                   /* number of prefix codes available */
 71     unsigned used;              /* code entries in table used */
 72     unsigned huff;              /* Huffman code */
 73     unsigned incr;              /* for incrementing code, index */
 74     unsigned fill;              /* index for replicating entries */
 75     unsigned low;               /* low bits for current root entry */
 76     unsigned mask;              /* mask for low root bits */
 77     code here;                  /* table entry for duplication */
 78     code FAR *next;             /* next available space in table */
 79     const unsigned short FAR *base;     /* base value table to use */
 80     const unsigned short FAR *extra;    /* extra bits table to use */
 81     unsigned match;             /* use base and extra for symbol >= match */
 82     unsigned short count[MAXBITS+1];    /* number of codes of each length */
 83     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
 84     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
 85         3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
 86         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
 87     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
 88         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
 89         19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 194, 65};
 90     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
 91         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
 92         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
 93         8193, 12289, 16385, 24577, 0, 0};
 94     static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
 95         16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
 96         23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
 97         28, 28, 29, 29, 64, 64};
 98 
 99     /*
100        Process a set of code lengths to create a canonical Huffman code.  The
101        code lengths are lens[0..codes-1].  Each length corresponds to the
102        symbols 0..codes-1.  The Huffman code is generated by first sorting the
103        symbols by length from short to long, and retaining the symbol order
104        for codes with equal lengths.  Then the code starts with all zero bits
105        for the first code of the shortest length, and the codes are integer
106        increments for the same length, and zeros are appended as the length
107        increases.  For the deflate format, these bits are stored backwards
108        from their more natural integer increment ordering, and so when the
109        decoding tables are built in the large loop below, the integer codes
110        are incremented backwards.
111 
112        This routine assumes, but does not check, that all of the entries in
113        lens[] are in the range 0..MAXBITS.  The caller must assure this.
114        1..MAXBITS is interpreted as that code length.  zero means that that
115        symbol does not occur in this code.
116 
117        The codes are sorted by computing a count of codes for each length,
118        creating from that a table of starting indices for each length in the
119        sorted table, and then entering the symbols in order in the sorted
120        table.  The sorted table is work[], with that space being provided by
121        the caller.
122 
123        The length counts are used for other purposes as well, i.e. finding
124        the minimum and maximum length codes, determining if there are any
125        codes at all, checking for a valid set of lengths, and looking ahead
126        at length counts to determine sub-table sizes when building the
127        decoding tables.
128      */
129 
130     /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
131     for (len = 0; len <= MAXBITS; len++)
132         count[len] = 0;
133     for (sym = 0; sym < codes; sym++)
134         count[lens[sym]]++;
135 
136     /* bound code lengths, force root to be within code lengths */
137     root = *bits;
138     for (max = MAXBITS; max >= 1; max--)
139         if (count[max] != 0) break;
140     if (root > max) root = max;
141     if (max == 0) {                     /* no symbols to code at all */
142         here.op = (unsigned char)64;    /* invalid code marker */
143         here.bits = (unsigned char)1;
144         here.val = (unsigned short)0;
145         *(*table)++ = here;             /* make a table to force an error */
146         *(*table)++ = here;
147         *bits = 1;
148         return 0;     /* no symbols, but wait for decoding to report error */
149     }
150     for (min = 1; min < max; min++)
151         if (count[min] != 0) break;
152     if (root < min) root = min;
153 
154     /* check for an over-subscribed or incomplete set of lengths */
155     left = 1;
156     for (len = 1; len <= MAXBITS; len++) {
157         left <<= 1;
158         left -= count[len];
159         if (left < 0) return -1;        /* over-subscribed */
160     }
161     if (left > 0 && (type == CODES || max != 1))
162         return -1;                      /* incomplete set */
163 
164     /* generate offsets into symbol table for each length for sorting */
165     offs[1] = 0;
166     for (len = 1; len < MAXBITS; len++)
167         offs[len + 1] = offs[len] + count[len];
168 
169     /* sort symbols by length, by symbol order within each length */
170     for (sym = 0; sym < codes; sym++)
171         if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
172 
173     /*
174        Create and fill in decoding tables.  In this loop, the table being
175        filled is at next and has curr index bits.  The code being used is huff
176        with length len.  That code is converted to an index by dropping drop
177        bits off of the bottom.  For codes where len is less than drop + curr,
178        those top drop + curr - len bits are incremented through all values to
179        fill the table with replicated entries.
180 
181        root is the number of index bits for the root table.  When len exceeds
182        root, sub-tables are created pointed to by the root entry with an index
183        of the low root bits of huff.  This is saved in low to check for when a
184        new sub-table should be started.  drop is zero when the root table is
185        being filled, and drop is root when sub-tables are being filled.
186 
187        When a new sub-table is needed, it is necessary to look ahead in the
188        code lengths to determine what size sub-table is needed.  The length
189        counts are used for this, and so count[] is decremented as codes are
190        entered in the tables.
191 
192        used keeps track of how many table entries have been allocated from the
193        provided *table space.  It is checked for LENS and DIST tables against
194        the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
195        the initial root table size constants.  See the comments in inftrees.h
196        for more information.
197 
198        sym increments through all symbols, and the loop terminates when
199        all codes of length max, i.e. all codes, have been processed.  This
200        routine permits incomplete codes, so another loop after this one fills
201        in the rest of the decoding tables with invalid code markers.
202      */
203 
204     /* set up for code type */
205     switch (type) {
206     case CODES:
207         base = extra = work;    /* dummy value--not used */
208         match = 20;
209         break;
210     case LENS:
211         base = lbase;
212         extra = lext;
213         match = 257;
214         break;
215     default:    /* DISTS */
216         base = dbase;
217         extra = dext;
218         match = 0;
219     }
220 
221     /* initialize state for loop */
222     huff = 0;                   /* starting code */
223     sym = 0;                    /* starting code symbol */
224     len = min;                  /* starting code length */
225     next = *table;              /* current table to fill in */
226     curr = root;                /* current table index bits */
227     drop = 0;                   /* current bits to drop from code for index */
228     low = (unsigned)(-1);       /* trigger new sub-table when len > root */
229     used = 1U << root;          /* use root table entries */
230     mask = used - 1;            /* mask for comparing low */
231 
232     /* check available table space */
233     if ((type == LENS && used > ENOUGH_LENS) ||
234         (type == DISTS && used > ENOUGH_DISTS))
235         return 1;
236 
237     /* process all codes and make table entries */
238     for (;;) {
239         /* create table entry */
240         here.bits = (unsigned char)(len - drop);
241         if (work[sym] + 1U < match) {
242             here.op = (unsigned char)0;
243             here.val = work[sym];
244         }
245         else if (work[sym] >= match) {
246             here.op = (unsigned char)(extra[work[sym] - match]);
247             here.val = base[work[sym] - match];
248         }
249         else {
250             here.op = (unsigned char)(32 + 64);         /* end of block */
251             here.val = 0;
252         }
253 
254         /* replicate for those indices with low len bits equal to huff */
255         incr = 1U << (len - drop);
256         fill = 1U << curr;
257         min = fill;                 /* save offset to next table */
258         do {
259             fill -= incr;
260             next[(huff >> drop) + fill] = here;
261         } while (fill != 0);
262 
263         /* backwards increment the len-bit code huff */
264         incr = 1U << (len - 1);
265         while (huff & incr)
266             incr >>= 1;
267         if (incr != 0) {
268             huff &= incr - 1;
269             huff += incr;
270         }
271         else
272             huff = 0;
273 
274         /* go to next symbol, update count, len */
275         sym++;
276         if (--(count[len]) == 0) {
277             if (len == max) break;
278             len = lens[work[sym]];
279         }
280 
281         /* create new sub-table if needed */
282         if (len > root && (huff & mask) != low) {
283             /* if first time, transition to sub-tables */
284             if (drop == 0)
285                 drop = root;
286 
287             /* increment past last table */
288             next += min;            /* here min is 1 << curr */
289 
290             /* determine length of next table */
291             curr = len - drop;
292             left = (int)(1 << curr);
293             while (curr + drop < max) {
294                 left -= count[curr + drop];
295                 if (left <= 0) break;
296                 curr++;
297                 left <<= 1;
298             }
299 
300             /* check for enough space */
301             used += 1U << curr;
302             if ((type == LENS && used > ENOUGH_LENS) ||
303                 (type == DISTS && used > ENOUGH_DISTS))
304                 return 1;
305 
306             /* point entry in root table to sub-table */
307             low = huff & mask;
308             (*table)[low].op = (unsigned char)curr;
309             (*table)[low].bits = (unsigned char)root;
310             (*table)[low].val = (unsigned short)(next - *table);
311         }
312     }
313 
314     /* fill in remaining table entry if code is incomplete (guaranteed to have
315        at most one remaining entry, since if the code is incomplete, the
316        maximum code length that was allowed to get this far is one bit) */
317     if (huff != 0) {
318         here.op = (unsigned char)64;            /* invalid code marker */
319         here.bits = (unsigned char)(len - drop);
320         here.val = (unsigned short)0;
321         next[huff] = here;
322     }
323 
324     /* set return parameters */
325     *table += used;
326     *bits = root;
327     return 0;
328 }