1 /*
  2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  3  *
  4  * This code is free software; you can redistribute it and/or modify it
  5  * under the terms of the GNU General Public License version 2 only, as
  6  * published by the Free Software Foundation.  Oracle designates this
  7  * particular file as subject to the "Classpath" exception as provided
  8  * by Oracle in the LICENSE file that accompanied this code.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 /* gzread.c -- zlib functions for reading gzip files
 26  * Copyright (C) 2004-2017 Mark Adler
 27  * For conditions of distribution and use, see copyright notice in zlib.h
 28  */
 29 
 30 #include "gzguts.h"
 31 
 32 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
 33    state->fd, and update state->eof, state->err, and state->msg as appropriate.
 34    This function needs to loop on read(), since read() is not guaranteed to
 35    read the number of bytes requested, depending on the type of descriptor. */
 36 local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
 37                   unsigned *have) {
 38     int ret;
 39     unsigned get, max = ((unsigned)-1 >> 2) + 1;
 40 
 41     *have = 0;
 42     do {
 43         get = len - *have;
 44         if (get > max)
 45             get = max;
 46         ret = read(state->fd, buf + *have, get);
 47         if (ret <= 0)
 48             break;
 49         *have += (unsigned)ret;
 50     } while (*have < len);
 51     if (ret < 0) {
 52         gz_error(state, Z_ERRNO, zstrerror());
 53         return -1;
 54     }
 55     if (ret == 0)
 56         state->eof = 1;
 57     return 0;
 58 }
 59 
 60 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
 61    error, 0 otherwise.  Note that the eof flag is set when the end of the input
 62    file is reached, even though there may be unused data in the buffer.  Once
 63    that data has been used, no more attempts will be made to read the file.
 64    If strm->avail_in != 0, then the current data is moved to the beginning of
 65    the input buffer, and then the remainder of the buffer is loaded with the
 66    available data from the input file. */
 67 local int gz_avail(gz_statep state) {
 68     unsigned got;
 69     z_streamp strm = &(state->strm);
 70 
 71     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
 72         return -1;
 73     if (state->eof == 0) {
 74         if (strm->avail_in) {       /* copy what's there to the start */
 75             unsigned char *p = state->in;
 76             unsigned const char *q = strm->next_in;
 77             unsigned n = strm->avail_in;
 78             do {
 79                 *p++ = *q++;
 80             } while (--n);
 81         }
 82         if (gz_load(state, state->in + strm->avail_in,
 83                     state->size - strm->avail_in, &got) == -1)
 84             return -1;
 85         strm->avail_in += got;
 86         strm->next_in = state->in;
 87     }
 88     return 0;
 89 }
 90 
 91 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
 92    If this is the first time in, allocate required memory.  state->how will be
 93    left unchanged if there is no more input data available, will be set to COPY
 94    if there is no gzip header and direct copying will be performed, or it will
 95    be set to GZIP for decompression.  If direct copying, then leftover input
 96    data from the input buffer will be copied to the output buffer.  In that
 97    case, all further file reads will be directly to either the output buffer or
 98    a user buffer.  If decompressing, the inflate state will be initialized.
 99    gz_look() will return 0 on success or -1 on failure. */
100 local int gz_look(gz_statep state) {
101     z_streamp strm = &(state->strm);
102 
103     /* allocate read buffers and inflate memory */
104     if (state->size == 0) {
105         /* allocate buffers */
106         state->in = (unsigned char *)malloc(state->want);
107         state->out = (unsigned char *)malloc(state->want << 1);
108         if (state->in == NULL || state->out == NULL) {
109             free(state->out);
110             free(state->in);
111             gz_error(state, Z_MEM_ERROR, "out of memory");
112             return -1;
113         }
114         state->size = state->want;
115 
116         /* allocate inflate memory */
117         state->strm.zalloc = Z_NULL;
118         state->strm.zfree = Z_NULL;
119         state->strm.opaque = Z_NULL;
120         state->strm.avail_in = 0;
121         state->strm.next_in = Z_NULL;
122         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
123             free(state->out);
124             free(state->in);
125             state->size = 0;
126             gz_error(state, Z_MEM_ERROR, "out of memory");
127             return -1;
128         }
129     }
130 
131     /* get at least the magic bytes in the input buffer */
132     if (strm->avail_in < 2) {
133         if (gz_avail(state) == -1)
134             return -1;
135         if (strm->avail_in == 0)
136             return 0;
137     }
138 
139     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
140        a logical dilemma here when considering the case of a partially written
141        gzip file, to wit, if a single 31 byte is written, then we cannot tell
142        whether this is a single-byte file, or just a partially written gzip
143        file -- for here we assume that if a gzip file is being written, then
144        the header will be written in a single operation, so that reading a
145        single byte is sufficient indication that it is not a gzip file) */
146     if (strm->avail_in > 1 &&
147             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
148         inflateReset(strm);
149         state->how = GZIP;
150         state->direct = 0;
151         return 0;
152     }
153 
154     /* no gzip header -- if we were decoding gzip before, then this is trailing
155        garbage.  Ignore the trailing garbage and finish. */
156     if (state->direct == 0) {
157         strm->avail_in = 0;
158         state->eof = 1;
159         state->x.have = 0;
160         return 0;
161     }
162 
163     /* doing raw i/o, copy any leftover input to output -- this assumes that
164        the output buffer is larger than the input buffer, which also assures
165        space for gzungetc() */
166     state->x.next = state->out;
167     memcpy(state->x.next, strm->next_in, strm->avail_in);
168     state->x.have = strm->avail_in;
169     strm->avail_in = 0;
170     state->how = COPY;
171     state->direct = 1;
172     return 0;
173 }
174 
175 /* Decompress from input to the provided next_out and avail_out in the state.
176    On return, state->x.have and state->x.next point to the just decompressed
177    data.  If the gzip stream completes, state->how is reset to LOOK to look for
178    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
179    on success, -1 on failure. */
180 local int gz_decomp(gz_statep state) {
181     int ret = Z_OK;
182     unsigned had;
183     z_streamp strm = &(state->strm);
184 
185     /* fill output buffer up to end of deflate stream */
186     had = strm->avail_out;
187     do {
188         /* get more input for inflate() */
189         if (strm->avail_in == 0 && gz_avail(state) == -1)
190             return -1;
191         if (strm->avail_in == 0) {
192             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
193             break;
194         }
195 
196         /* decompress and handle errors */
197         ret = inflate(strm, Z_NO_FLUSH);
198         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
199             gz_error(state, Z_STREAM_ERROR,
200                      "internal error: inflate stream corrupt");
201             return -1;
202         }
203         if (ret == Z_MEM_ERROR) {
204             gz_error(state, Z_MEM_ERROR, "out of memory");
205             return -1;
206         }
207         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
208             gz_error(state, Z_DATA_ERROR,
209                      strm->msg == NULL ? "compressed data error" : strm->msg);
210             return -1;
211         }
212     } while (strm->avail_out && ret != Z_STREAM_END);
213 
214     /* update available output */
215     state->x.have = had - strm->avail_out;
216     state->x.next = strm->next_out - state->x.have;
217 
218     /* if the gzip stream completed successfully, look for another */
219     if (ret == Z_STREAM_END)
220         state->how = LOOK;
221 
222     /* good decompression */
223     return 0;
224 }
225 
226 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
227    Data is either copied from the input file or decompressed from the input
228    file depending on state->how.  If state->how is LOOK, then a gzip header is
229    looked for to determine whether to copy or decompress.  Returns -1 on error,
230    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
231    end of the input file has been reached and all data has been processed.  */
232 local int gz_fetch(gz_statep state) {
233     z_streamp strm = &(state->strm);
234 
235     do {
236         switch(state->how) {
237         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
238             if (gz_look(state) == -1)
239                 return -1;
240             if (state->how == LOOK)
241                 return 0;
242             break;
243         case COPY:      /* -> COPY */
244             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
245                     == -1)
246                 return -1;
247             state->x.next = state->out;
248             return 0;
249         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
250             strm->avail_out = state->size << 1;
251             strm->next_out = state->out;
252             if (gz_decomp(state) == -1)
253                 return -1;
254         }
255     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
256     return 0;
257 }
258 
259 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
260 local int gz_skip(gz_statep state, z_off64_t len) {
261     unsigned n;
262 
263     /* skip over len bytes or reach end-of-file, whichever comes first */
264     while (len)
265         /* skip over whatever is in output buffer */
266         if (state->x.have) {
267             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
268                 (unsigned)len : state->x.have;
269             state->x.have -= n;
270             state->x.next += n;
271             state->x.pos += n;
272             len -= n;
273         }
274 
275         /* output buffer empty -- return if we're at the end of the input */
276         else if (state->eof && state->strm.avail_in == 0)
277             break;
278 
279         /* need more data to skip -- load up output buffer */
280         else {
281             /* get more output, looking for header if required */
282             if (gz_fetch(state) == -1)
283                 return -1;
284         }
285     return 0;
286 }
287 
288 /* Read len bytes into buf from file, or less than len up to the end of the
289    input.  Return the number of bytes read.  If zero is returned, either the
290    end of file was reached, or there was an error.  state->err must be
291    consulted in that case to determine which. */
292 local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
293     z_size_t got;
294     unsigned n;
295 
296     /* if len is zero, avoid unnecessary operations */
297     if (len == 0)
298         return 0;
299 
300     /* process a skip request */
301     if (state->seek) {
302         state->seek = 0;
303         if (gz_skip(state, state->skip) == -1)
304             return 0;
305     }
306 
307     /* get len bytes to buf, or less than len if at the end */
308     got = 0;
309     do {
310         /* set n to the maximum amount of len that fits in an unsigned int */
311         n = (unsigned)-1;
312         if (n > len)
313             n = (unsigned)len;
314 
315         /* first just try copying data from the output buffer */
316         if (state->x.have) {
317             if (state->x.have < n)
318                 n = state->x.have;
319             memcpy(buf, state->x.next, n);
320             state->x.next += n;
321             state->x.have -= n;
322         }
323 
324         /* output buffer empty -- return if we're at the end of the input */
325         else if (state->eof && state->strm.avail_in == 0) {
326             state->past = 1;        /* tried to read past end */
327             break;
328         }
329 
330         /* need output data -- for small len or new stream load up our output
331            buffer */
332         else if (state->how == LOOK || n < (state->size << 1)) {
333             /* get more output, looking for header if required */
334             if (gz_fetch(state) == -1)
335                 return 0;
336             continue;       /* no progress yet -- go back to copy above */
337             /* the copy above assures that we will leave with space in the
338                output buffer, allowing at least one gzungetc() to succeed */
339         }
340 
341         /* large len -- read directly into user buffer */
342         else if (state->how == COPY) {      /* read directly */
343             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
344                 return 0;
345         }
346 
347         /* large len -- decompress directly into user buffer */
348         else {  /* state->how == GZIP */
349             state->strm.avail_out = n;
350             state->strm.next_out = (unsigned char *)buf;
351             if (gz_decomp(state) == -1)
352                 return 0;
353             n = state->x.have;
354             state->x.have = 0;
355         }
356 
357         /* update progress */
358         len -= n;
359         buf = (char *)buf + n;
360         got += n;
361         state->x.pos += n;
362     } while (len);
363 
364     /* return number of bytes read into user buffer */
365     return got;
366 }
367 
368 /* -- see zlib.h -- */
369 int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
370     gz_statep state;
371 
372     /* get internal structure */
373     if (file == NULL)
374         return -1;
375     state = (gz_statep)file;
376 
377     /* check that we're reading and that there's no (serious) error */
378     if (state->mode != GZ_READ ||
379             (state->err != Z_OK && state->err != Z_BUF_ERROR))
380         return -1;
381 
382     /* since an int is returned, make sure len fits in one, otherwise return
383        with an error (this avoids a flaw in the interface) */
384     if ((int)len < 0) {
385         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
386         return -1;
387     }
388 
389     /* read len or fewer bytes to buf */
390     len = (unsigned)gz_read(state, buf, len);
391 
392     /* check for an error */
393     if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
394         return -1;
395 
396     /* return the number of bytes read (this is assured to fit in an int) */
397     return (int)len;
398 }
399 
400 /* -- see zlib.h -- */
401 z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) {
402     z_size_t len;
403     gz_statep state;
404 
405     /* get internal structure */
406     if (file == NULL)
407         return 0;
408     state = (gz_statep)file;
409 
410     /* check that we're reading and that there's no (serious) error */
411     if (state->mode != GZ_READ ||
412             (state->err != Z_OK && state->err != Z_BUF_ERROR))
413         return 0;
414 
415     /* compute bytes to read -- error on overflow */
416     len = nitems * size;
417     if (size && len / size != nitems) {
418         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
419         return 0;
420     }
421 
422     /* read len or fewer bytes to buf, return the number of full items read */
423     return len ? gz_read(state, buf, len) / size : 0;
424 }
425 
426 /* -- see zlib.h -- */
427 #ifdef Z_PREFIX_SET
428 #  undef z_gzgetc
429 #else
430 #  undef gzgetc
431 #endif
432 int ZEXPORT gzgetc(gzFile file) {
433     unsigned char buf[1];
434     gz_statep state;
435 
436     /* get internal structure */
437     if (file == NULL)
438         return -1;
439     state = (gz_statep)file;
440 
441     /* check that we're reading and that there's no (serious) error */
442     if (state->mode != GZ_READ ||
443         (state->err != Z_OK && state->err != Z_BUF_ERROR))
444         return -1;
445 
446     /* try output buffer (no need to check for skip request) */
447     if (state->x.have) {
448         state->x.have--;
449         state->x.pos++;
450         return *(state->x.next)++;
451     }
452 
453     /* nothing there -- try gz_read() */
454     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
455 }
456 
457 int ZEXPORT gzgetc_(gzFile file) {
458     return gzgetc(file);
459 }
460 
461 /* -- see zlib.h -- */
462 int ZEXPORT gzungetc(int c, gzFile file) {
463     gz_statep state;
464 
465     /* get internal structure */
466     if (file == NULL)
467         return -1;
468     state = (gz_statep)file;
469 
470     /* in case this was just opened, set up the input buffer */
471     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
472         (void)gz_look(state);
473 
474     /* check that we're reading and that there's no (serious) error */
475     if (state->mode != GZ_READ ||
476         (state->err != Z_OK && state->err != Z_BUF_ERROR))
477         return -1;
478 
479     /* process a skip request */
480     if (state->seek) {
481         state->seek = 0;
482         if (gz_skip(state, state->skip) == -1)
483             return -1;
484     }
485 
486     /* can't push EOF */
487     if (c < 0)
488         return -1;
489 
490     /* if output buffer empty, put byte at end (allows more pushing) */
491     if (state->x.have == 0) {
492         state->x.have = 1;
493         state->x.next = state->out + (state->size << 1) - 1;
494         state->x.next[0] = (unsigned char)c;
495         state->x.pos--;
496         state->past = 0;
497         return c;
498     }
499 
500     /* if no room, give up (must have already done a gzungetc()) */
501     if (state->x.have == (state->size << 1)) {
502         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
503         return -1;
504     }
505 
506     /* slide output data if needed and insert byte before existing data */
507     if (state->x.next == state->out) {
508         unsigned char *src = state->out + state->x.have;
509         unsigned char *dest = state->out + (state->size << 1);
510         while (src > state->out)
511             *--dest = *--src;
512         state->x.next = dest;
513     }
514     state->x.have++;
515     state->x.next--;
516     state->x.next[0] = (unsigned char)c;
517     state->x.pos--;
518     state->past = 0;
519     return c;
520 }
521 
522 /* -- see zlib.h -- */
523 char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
524     unsigned left, n;
525     char *str;
526     unsigned char *eol;
527     gz_statep state;
528 
529     /* check parameters and get internal structure */
530     if (file == NULL || buf == NULL || len < 1)
531         return NULL;
532     state = (gz_statep)file;
533 
534     /* check that we're reading and that there's no (serious) error */
535     if (state->mode != GZ_READ ||
536         (state->err != Z_OK && state->err != Z_BUF_ERROR))
537         return NULL;
538 
539     /* process a skip request */
540     if (state->seek) {
541         state->seek = 0;
542         if (gz_skip(state, state->skip) == -1)
543             return NULL;
544     }
545 
546     /* copy output bytes up to new line or len - 1, whichever comes first --
547        append a terminating zero to the string (we don't check for a zero in
548        the contents, let the user worry about that) */
549     str = buf;
550     left = (unsigned)len - 1;
551     if (left) do {
552         /* assure that something is in the output buffer */
553         if (state->x.have == 0 && gz_fetch(state) == -1)
554             return NULL;                /* error */
555         if (state->x.have == 0) {       /* end of file */
556             state->past = 1;            /* read past end */
557             break;                      /* return what we have */
558         }
559 
560         /* look for end-of-line in current output buffer */
561         n = state->x.have > left ? left : state->x.have;
562         eol = (unsigned char *)memchr(state->x.next, '\n', n);
563         if (eol != NULL)
564             n = (unsigned)(eol - state->x.next) + 1;
565 
566         /* copy through end-of-line, or remainder if not found */
567         memcpy(buf, state->x.next, n);
568         state->x.have -= n;
569         state->x.next += n;
570         state->x.pos += n;
571         left -= n;
572         buf += n;
573     } while (left && eol == NULL);
574 
575     /* return terminated string, or if nothing, end of file */
576     if (buf == str)
577         return NULL;
578     buf[0] = 0;
579     return str;
580 }
581 
582 /* -- see zlib.h -- */
583 int ZEXPORT gzdirect(gzFile file) {
584     gz_statep state;
585 
586     /* get internal structure */
587     if (file == NULL)
588         return 0;
589     state = (gz_statep)file;
590 
591     /* if the state is not known, but we can find out, then do so (this is
592        mainly for right after a gzopen() or gzdopen()) */
593     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
594         (void)gz_look(state);
595 
596     /* return 1 if transparent, 0 if processing a gzip stream */
597     return state->direct;
598 }
599 
600 /* -- see zlib.h -- */
601 int ZEXPORT gzclose_r(gzFile file) {
602     int ret, err;
603     gz_statep state;
604 
605     /* get internal structure */
606     if (file == NULL)
607         return Z_STREAM_ERROR;
608     state = (gz_statep)file;
609 
610     /* check that we're reading */
611     if (state->mode != GZ_READ)
612         return Z_STREAM_ERROR;
613 
614     /* free memory and close file */
615     if (state->size) {
616         inflateEnd(&(state->strm));
617         free(state->out);
618         free(state->in);
619     }
620     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
621     gz_error(state, Z_OK, NULL);
622     free(state->path);
623     ret = close(state->fd);
624     free(state);
625     return ret ? Z_ERRNO : err;
626 }