1 /*
  2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  3  *
  4  * This code is free software; you can redistribute it and/or modify it
  5  * under the terms of the GNU General Public License version 2 only, as
  6  * published by the Free Software Foundation.  Oracle designates this
  7  * particular file as subject to the "Classpath" exception as provided
  8  * by Oracle in the LICENSE file that accompanied this code.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 /* gzread.c -- zlib functions for reading gzip files
 26  * Copyright (C) 2004-2017 Mark Adler
 27  * For conditions of distribution and use, see copyright notice in zlib.h
 28  */
 29 
 30 #include "gzguts.h"
 31 
 32 /* Local functions */
 33 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
 34 local int gz_avail OF((gz_statep));
 35 local int gz_look OF((gz_statep));
 36 local int gz_decomp OF((gz_statep));
 37 local int gz_fetch OF((gz_statep));
 38 local int gz_skip OF((gz_statep, z_off64_t));
 39 local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
 40 
 41 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
 42    state->fd, and update state->eof, state->err, and state->msg as appropriate.
 43    This function needs to loop on read(), since read() is not guaranteed to
 44    read the number of bytes requested, depending on the type of descriptor. */
 45 local int gz_load(state, buf, len, have)
 46     gz_statep state;
 47     unsigned char *buf;
 48     unsigned len;
 49     unsigned *have;
 50 {
 51     int ret;
 52     unsigned get, max = ((unsigned)-1 >> 2) + 1;
 53 
 54     *have = 0;
 55     do {
 56         get = len - *have;
 57         if (get > max)
 58             get = max;
 59         ret = read(state->fd, buf + *have, get);
 60         if (ret <= 0)
 61             break;
 62         *have += (unsigned)ret;
 63     } while (*have < len);
 64     if (ret < 0) {
 65         gz_error(state, Z_ERRNO, zstrerror());
 66         return -1;
 67     }
 68     if (ret == 0)
 69         state->eof = 1;
 70     return 0;
 71 }
 72 
 73 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
 74    error, 0 otherwise.  Note that the eof flag is set when the end of the input
 75    file is reached, even though there may be unused data in the buffer.  Once
 76    that data has been used, no more attempts will be made to read the file.
 77    If strm->avail_in != 0, then the current data is moved to the beginning of
 78    the input buffer, and then the remainder of the buffer is loaded with the
 79    available data from the input file. */
 80 local int gz_avail(state)
 81     gz_statep state;
 82 {
 83     unsigned got;
 84     z_streamp strm = &(state->strm);
 85 
 86     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
 87         return -1;
 88     if (state->eof == 0) {
 89         if (strm->avail_in) {       /* copy what's there to the start */
 90             unsigned char *p = state->in;
 91             unsigned const char *q = strm->next_in;
 92             unsigned n = strm->avail_in;
 93             do {
 94                 *p++ = *q++;
 95             } while (--n);
 96         }
 97         if (gz_load(state, state->in + strm->avail_in,
 98                     state->size - strm->avail_in, &got) == -1)
 99             return -1;
100         strm->avail_in += got;
101         strm->next_in = state->in;
102     }
103     return 0;
104 }
105 
106 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
107    If this is the first time in, allocate required memory.  state->how will be
108    left unchanged if there is no more input data available, will be set to COPY
109    if there is no gzip header and direct copying will be performed, or it will
110    be set to GZIP for decompression.  If direct copying, then leftover input
111    data from the input buffer will be copied to the output buffer.  In that
112    case, all further file reads will be directly to either the output buffer or
113    a user buffer.  If decompressing, the inflate state will be initialized.
114    gz_look() will return 0 on success or -1 on failure. */
115 local int gz_look(state)
116     gz_statep state;
117 {
118     z_streamp strm = &(state->strm);
119 
120     /* allocate read buffers and inflate memory */
121     if (state->size == 0) {
122         /* allocate buffers */
123         state->in = (unsigned char *)malloc(state->want);
124         state->out = (unsigned char *)malloc(state->want << 1);
125         if (state->in == NULL || state->out == NULL) {
126             free(state->out);
127             free(state->in);
128             gz_error(state, Z_MEM_ERROR, "out of memory");
129             return -1;
130         }
131         state->size = state->want;
132 
133         /* allocate inflate memory */
134         state->strm.zalloc = Z_NULL;
135         state->strm.zfree = Z_NULL;
136         state->strm.opaque = Z_NULL;
137         state->strm.avail_in = 0;
138         state->strm.next_in = Z_NULL;
139         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
140             free(state->out);
141             free(state->in);
142             state->size = 0;
143             gz_error(state, Z_MEM_ERROR, "out of memory");
144             return -1;
145         }
146     }
147 
148     /* get at least the magic bytes in the input buffer */
149     if (strm->avail_in < 2) {
150         if (gz_avail(state) == -1)
151             return -1;
152         if (strm->avail_in == 0)
153             return 0;
154     }
155 
156     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
157        a logical dilemma here when considering the case of a partially written
158        gzip file, to wit, if a single 31 byte is written, then we cannot tell
159        whether this is a single-byte file, or just a partially written gzip
160        file -- for here we assume that if a gzip file is being written, then
161        the header will be written in a single operation, so that reading a
162        single byte is sufficient indication that it is not a gzip file) */
163     if (strm->avail_in > 1 &&
164             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
165         inflateReset(strm);
166         state->how = GZIP;
167         state->direct = 0;
168         return 0;
169     }
170 
171     /* no gzip header -- if we were decoding gzip before, then this is trailing
172        garbage.  Ignore the trailing garbage and finish. */
173     if (state->direct == 0) {
174         strm->avail_in = 0;
175         state->eof = 1;
176         state->x.have = 0;
177         return 0;
178     }
179 
180     /* doing raw i/o, copy any leftover input to output -- this assumes that
181        the output buffer is larger than the input buffer, which also assures
182        space for gzungetc() */
183     state->x.next = state->out;
184     memcpy(state->x.next, strm->next_in, strm->avail_in);
185     state->x.have = strm->avail_in;
186     strm->avail_in = 0;
187     state->how = COPY;
188     state->direct = 1;
189     return 0;
190 }
191 
192 /* Decompress from input to the provided next_out and avail_out in the state.
193    On return, state->x.have and state->x.next point to the just decompressed
194    data.  If the gzip stream completes, state->how is reset to LOOK to look for
195    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
196    on success, -1 on failure. */
197 local int gz_decomp(state)
198     gz_statep state;
199 {
200     int ret = Z_OK;
201     unsigned had;
202     z_streamp strm = &(state->strm);
203 
204     /* fill output buffer up to end of deflate stream */
205     had = strm->avail_out;
206     do {
207         /* get more input for inflate() */
208         if (strm->avail_in == 0 && gz_avail(state) == -1)
209             return -1;
210         if (strm->avail_in == 0) {
211             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
212             break;
213         }
214 
215         /* decompress and handle errors */
216         ret = inflate(strm, Z_NO_FLUSH);
217         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
218             gz_error(state, Z_STREAM_ERROR,
219                      "internal error: inflate stream corrupt");
220             return -1;
221         }
222         if (ret == Z_MEM_ERROR) {
223             gz_error(state, Z_MEM_ERROR, "out of memory");
224             return -1;
225         }
226         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
227             gz_error(state, Z_DATA_ERROR,
228                      strm->msg == NULL ? "compressed data error" : strm->msg);
229             return -1;
230         }
231     } while (strm->avail_out && ret != Z_STREAM_END);
232 
233     /* update available output */
234     state->x.have = had - strm->avail_out;
235     state->x.next = strm->next_out - state->x.have;
236 
237     /* if the gzip stream completed successfully, look for another */
238     if (ret == Z_STREAM_END)
239         state->how = LOOK;
240 
241     /* good decompression */
242     return 0;
243 }
244 
245 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
246    Data is either copied from the input file or decompressed from the input
247    file depending on state->how.  If state->how is LOOK, then a gzip header is
248    looked for to determine whether to copy or decompress.  Returns -1 on error,
249    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
250    end of the input file has been reached and all data has been processed.  */
251 local int gz_fetch(state)
252     gz_statep state;
253 {
254     z_streamp strm = &(state->strm);
255 
256     do {
257         switch(state->how) {
258         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
259             if (gz_look(state) == -1)
260                 return -1;
261             if (state->how == LOOK)
262                 return 0;
263             break;
264         case COPY:      /* -> COPY */
265             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
266                     == -1)
267                 return -1;
268             state->x.next = state->out;
269             return 0;
270         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
271             strm->avail_out = state->size << 1;
272             strm->next_out = state->out;
273             if (gz_decomp(state) == -1)
274                 return -1;
275         }
276     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
277     return 0;
278 }
279 
280 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
281 local int gz_skip(state, len)
282     gz_statep state;
283     z_off64_t len;
284 {
285     unsigned n;
286 
287     /* skip over len bytes or reach end-of-file, whichever comes first */
288     while (len)
289         /* skip over whatever is in output buffer */
290         if (state->x.have) {
291             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
292                 (unsigned)len : state->x.have;
293             state->x.have -= n;
294             state->x.next += n;
295             state->x.pos += n;
296             len -= n;
297         }
298 
299         /* output buffer empty -- return if we're at the end of the input */
300         else if (state->eof && state->strm.avail_in == 0)
301             break;
302 
303         /* need more data to skip -- load up output buffer */
304         else {
305             /* get more output, looking for header if required */
306             if (gz_fetch(state) == -1)
307                 return -1;
308         }
309     return 0;
310 }
311 
312 /* Read len bytes into buf from file, or less than len up to the end of the
313    input.  Return the number of bytes read.  If zero is returned, either the
314    end of file was reached, or there was an error.  state->err must be
315    consulted in that case to determine which. */
316 local z_size_t gz_read(state, buf, len)
317     gz_statep state;
318     voidp buf;
319     z_size_t len;
320 {
321     z_size_t got;
322     unsigned n;
323 
324     /* if len is zero, avoid unnecessary operations */
325     if (len == 0)
326         return 0;
327 
328     /* process a skip request */
329     if (state->seek) {
330         state->seek = 0;
331         if (gz_skip(state, state->skip) == -1)
332             return 0;
333     }
334 
335     /* get len bytes to buf, or less than len if at the end */
336     got = 0;
337     do {
338         /* set n to the maximum amount of len that fits in an unsigned int */
339         n = (unsigned)-1;
340         if (n > len)
341             n = (unsigned)len;
342 
343         /* first just try copying data from the output buffer */
344         if (state->x.have) {
345             if (state->x.have < n)
346                 n = state->x.have;
347             memcpy(buf, state->x.next, n);
348             state->x.next += n;
349             state->x.have -= n;
350         }
351 
352         /* output buffer empty -- return if we're at the end of the input */
353         else if (state->eof && state->strm.avail_in == 0) {
354             state->past = 1;        /* tried to read past end */
355             break;
356         }
357 
358         /* need output data -- for small len or new stream load up our output
359            buffer */
360         else if (state->how == LOOK || n < (state->size << 1)) {
361             /* get more output, looking for header if required */
362             if (gz_fetch(state) == -1)
363                 return 0;
364             continue;       /* no progress yet -- go back to copy above */
365             /* the copy above assures that we will leave with space in the
366                output buffer, allowing at least one gzungetc() to succeed */
367         }
368 
369         /* large len -- read directly into user buffer */
370         else if (state->how == COPY) {      /* read directly */
371             if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
372                 return 0;
373         }
374 
375         /* large len -- decompress directly into user buffer */
376         else {  /* state->how == GZIP */
377             state->strm.avail_out = n;
378             state->strm.next_out = (unsigned char *)buf;
379             if (gz_decomp(state) == -1)
380                 return 0;
381             n = state->x.have;
382             state->x.have = 0;
383         }
384 
385         /* update progress */
386         len -= n;
387         buf = (char *)buf + n;
388         got += n;
389         state->x.pos += n;
390     } while (len);
391 
392     /* return number of bytes read into user buffer */
393     return got;
394 }
395 
396 /* -- see zlib.h -- */
397 int ZEXPORT gzread(file, buf, len)
398     gzFile file;
399     voidp buf;
400     unsigned len;
401 {
402     gz_statep state;
403 
404     /* get internal structure */
405     if (file == NULL)
406         return -1;
407     state = (gz_statep)file;
408 
409     /* check that we're reading and that there's no (serious) error */
410     if (state->mode != GZ_READ ||
411             (state->err != Z_OK && state->err != Z_BUF_ERROR))
412         return -1;
413 
414     /* since an int is returned, make sure len fits in one, otherwise return
415        with an error (this avoids a flaw in the interface) */
416     if ((int)len < 0) {
417         gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
418         return -1;
419     }
420 
421     /* read len or fewer bytes to buf */
422     len = (unsigned)gz_read(state, buf, len);
423 
424     /* check for an error */
425     if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
426         return -1;
427 
428     /* return the number of bytes read (this is assured to fit in an int) */
429     return (int)len;
430 }
431 
432 /* -- see zlib.h -- */
433 z_size_t ZEXPORT gzfread(buf, size, nitems, file)
434     voidp buf;
435     z_size_t size;
436     z_size_t nitems;
437     gzFile file;
438 {
439     z_size_t len;
440     gz_statep state;
441 
442     /* get internal structure */
443     if (file == NULL)
444         return 0;
445     state = (gz_statep)file;
446 
447     /* check that we're reading and that there's no (serious) error */
448     if (state->mode != GZ_READ ||
449             (state->err != Z_OK && state->err != Z_BUF_ERROR))
450         return 0;
451 
452     /* compute bytes to read -- error on overflow */
453     len = nitems * size;
454     if (size && len / size != nitems) {
455         gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
456         return 0;
457     }
458 
459     /* read len or fewer bytes to buf, return the number of full items read */
460     return len ? gz_read(state, buf, len) / size : 0;
461 }
462 
463 /* -- see zlib.h -- */
464 #ifdef Z_PREFIX_SET
465 #  undef z_gzgetc
466 #else
467 #  undef gzgetc
468 #endif
469 int ZEXPORT gzgetc(file)
470     gzFile file;
471 {
472     unsigned char buf[1];
473     gz_statep state;
474 
475     /* get internal structure */
476     if (file == NULL)
477         return -1;
478     state = (gz_statep)file;
479 
480     /* check that we're reading and that there's no (serious) error */
481     if (state->mode != GZ_READ ||
482         (state->err != Z_OK && state->err != Z_BUF_ERROR))
483         return -1;
484 
485     /* try output buffer (no need to check for skip request) */
486     if (state->x.have) {
487         state->x.have--;
488         state->x.pos++;
489         return *(state->x.next)++;
490     }
491 
492     /* nothing there -- try gz_read() */
493     return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
494 }
495 
496 int ZEXPORT gzgetc_(file)
497 gzFile file;
498 {
499     return gzgetc(file);
500 }
501 
502 /* -- see zlib.h -- */
503 int ZEXPORT gzungetc(c, file)
504     int c;
505     gzFile file;
506 {
507     gz_statep state;
508 
509     /* get internal structure */
510     if (file == NULL)
511         return -1;
512     state = (gz_statep)file;
513 
514     /* check that we're reading and that there's no (serious) error */
515     if (state->mode != GZ_READ ||
516         (state->err != Z_OK && state->err != Z_BUF_ERROR))
517         return -1;
518 
519     /* process a skip request */
520     if (state->seek) {
521         state->seek = 0;
522         if (gz_skip(state, state->skip) == -1)
523             return -1;
524     }
525 
526     /* can't push EOF */
527     if (c < 0)
528         return -1;
529 
530     /* if output buffer empty, put byte at end (allows more pushing) */
531     if (state->x.have == 0) {
532         state->x.have = 1;
533         state->x.next = state->out + (state->size << 1) - 1;
534         state->x.next[0] = (unsigned char)c;
535         state->x.pos--;
536         state->past = 0;
537         return c;
538     }
539 
540     /* if no room, give up (must have already done a gzungetc()) */
541     if (state->x.have == (state->size << 1)) {
542         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
543         return -1;
544     }
545 
546     /* slide output data if needed and insert byte before existing data */
547     if (state->x.next == state->out) {
548         unsigned char *src = state->out + state->x.have;
549         unsigned char *dest = state->out + (state->size << 1);
550         while (src > state->out)
551             *--dest = *--src;
552         state->x.next = dest;
553     }
554     state->x.have++;
555     state->x.next--;
556     state->x.next[0] = (unsigned char)c;
557     state->x.pos--;
558     state->past = 0;
559     return c;
560 }
561 
562 /* -- see zlib.h -- */
563 char * ZEXPORT gzgets(file, buf, len)
564     gzFile file;
565     char *buf;
566     int len;
567 {
568     unsigned left, n;
569     char *str;
570     unsigned char *eol;
571     gz_statep state;
572 
573     /* check parameters and get internal structure */
574     if (file == NULL || buf == NULL || len < 1)
575         return NULL;
576     state = (gz_statep)file;
577 
578     /* check that we're reading and that there's no (serious) error */
579     if (state->mode != GZ_READ ||
580         (state->err != Z_OK && state->err != Z_BUF_ERROR))
581         return NULL;
582 
583     /* process a skip request */
584     if (state->seek) {
585         state->seek = 0;
586         if (gz_skip(state, state->skip) == -1)
587             return NULL;
588     }
589 
590     /* copy output bytes up to new line or len - 1, whichever comes first --
591        append a terminating zero to the string (we don't check for a zero in
592        the contents, let the user worry about that) */
593     str = buf;
594     left = (unsigned)len - 1;
595     if (left) do {
596         /* assure that something is in the output buffer */
597         if (state->x.have == 0 && gz_fetch(state) == -1)
598             return NULL;                /* error */
599         if (state->x.have == 0) {       /* end of file */
600             state->past = 1;            /* read past end */
601             break;                      /* return what we have */
602         }
603 
604         /* look for end-of-line in current output buffer */
605         n = state->x.have > left ? left : state->x.have;
606         eol = (unsigned char *)memchr(state->x.next, '\n', n);
607         if (eol != NULL)
608             n = (unsigned)(eol - state->x.next) + 1;
609 
610         /* copy through end-of-line, or remainder if not found */
611         memcpy(buf, state->x.next, n);
612         state->x.have -= n;
613         state->x.next += n;
614         state->x.pos += n;
615         left -= n;
616         buf += n;
617     } while (left && eol == NULL);
618 
619     /* return terminated string, or if nothing, end of file */
620     if (buf == str)
621         return NULL;
622     buf[0] = 0;
623     return str;
624 }
625 
626 /* -- see zlib.h -- */
627 int ZEXPORT gzdirect(file)
628     gzFile file;
629 {
630     gz_statep state;
631 
632     /* get internal structure */
633     if (file == NULL)
634         return 0;
635     state = (gz_statep)file;
636 
637     /* if the state is not known, but we can find out, then do so (this is
638        mainly for right after a gzopen() or gzdopen()) */
639     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
640         (void)gz_look(state);
641 
642     /* return 1 if transparent, 0 if processing a gzip stream */
643     return state->direct;
644 }
645 
646 /* -- see zlib.h -- */
647 int ZEXPORT gzclose_r(file)
648     gzFile file;
649 {
650     int ret, err;
651     gz_statep state;
652 
653     /* get internal structure */
654     if (file == NULL)
655         return Z_STREAM_ERROR;
656     state = (gz_statep)file;
657 
658     /* check that we're reading */
659     if (state->mode != GZ_READ)
660         return Z_STREAM_ERROR;
661 
662     /* free memory and close file */
663     if (state->size) {
664         inflateEnd(&(state->strm));
665         free(state->out);
666         free(state->in);
667     }
668     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
669     gz_error(state, Z_OK, NULL);
670     free(state->path);
671     ret = close(state->fd);
672     free(state);
673     return ret ? Z_ERRNO : err;
674 }