001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.zip;
020
021import java.io.ByteArrayInputStream;
022import java.io.ByteArrayOutputStream;
023import java.io.EOFException;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.PushbackInputStream;
027import java.nio.ByteBuffer;
028import java.util.zip.CRC32;
029import java.util.zip.DataFormatException;
030import java.util.zip.Inflater;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipException;
033
034import org.apache.commons.compress.archivers.ArchiveEntry;
035import org.apache.commons.compress.archivers.ArchiveInputStream;
036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
038import org.apache.commons.compress.utils.ArchiveUtils;
039import org.apache.commons.compress.utils.IOUtils;
040
041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
045
046/**
047 * Implements an input stream that can read Zip archives.
048 *
049 * <p>As of Apache Commons Compress it transparently supports Zip64
050 * extensions and thus individual entries and archives larger than 4
051 * GB or with more than 65536 entries.</p>
052 *
053 * <p>The {@link ZipFile} class is preferred when reading from files
054 * as {@link ZipArchiveInputStream} is limited by not being able to
055 * read the central directory header before returning entries.  In
056 * particular {@link ZipArchiveInputStream}</p>
057 *
058 * <ul>
059 *
060 *  <li>may return entries that are not part of the central directory
061 *  at all and shouldn't be considered part of the archive.</li>
062 *
063 *  <li>may return several entries with the same name.</li>
064 *
065 *  <li>will not return internal or external attributes.</li>
066 *
067 *  <li>may return incomplete extra field data.</li>
068 *
069 *  <li>may return unknown sizes and CRC values for entries until the
070 *  next entry has been reached if the archive uses the data
071 *  descriptor feature.</li>
072 *
073 * </ul>
074 *
075 * @see ZipFile
076 * @NotThreadSafe
077 */
078public class ZipArchiveInputStream extends ArchiveInputStream {
079
080    /** The zip encoding to use for filenames and the file comment. */
081    private final ZipEncoding zipEncoding;
082
083    // the provided encoding (for unit tests)
084    final String encoding;
085
086    /** Whether to look for and use Unicode extra fields. */
087    private final boolean useUnicodeExtraFields;
088
089    /** Wrapped stream, will always be a PushbackInputStream. */
090    private final InputStream in;
091
092    /** Inflater used for all deflated entries. */
093    private final Inflater inf = new Inflater(true);
094
095    /** Buffer used to read from the wrapped stream. */
096    private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
097
098    /** The entry that is currently being read. */
099    private CurrentEntry current = null;
100
101    /** Whether the stream has been closed. */
102    private boolean closed = false;
103
104    /** Whether the stream has reached the central directory - and thus found all entries. */
105    private boolean hitCentralDirectory = false;
106
107    /**
108     * When reading a stored entry that uses the data descriptor this
109     * stream has to read the full entry and caches it.  This is the
110     * cache.
111     */
112    private ByteArrayInputStream lastStoredEntry = null;
113
114    /** Whether the stream will try to read STORED entries that use a data descriptor. */
115    private boolean allowStoredEntriesWithDataDescriptor = false;
116
117    private static final int LFH_LEN = 30;
118    /*
119      local file header signature     WORD
120      version needed to extract       SHORT
121      general purpose bit flag        SHORT
122      compression method              SHORT
123      last mod file time              SHORT
124      last mod file date              SHORT
125      crc-32                          WORD
126      compressed size                 WORD
127      uncompressed size               WORD
128      file name length                SHORT
129      extra field length              SHORT
130    */
131
132    private static final int CFH_LEN = 46;
133    /*
134        central file header signature   WORD
135        version made by                 SHORT
136        version needed to extract       SHORT
137        general purpose bit flag        SHORT
138        compression method              SHORT
139        last mod file time              SHORT
140        last mod file date              SHORT
141        crc-32                          WORD
142        compressed size                 WORD
143        uncompressed size               WORD
144        file name length                SHORT
145        extra field length              SHORT
146        file comment length             SHORT
147        disk number start               SHORT
148        internal file attributes        SHORT
149        external file attributes        WORD
150        relative offset of local header WORD
151    */
152
153    private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
154
155    // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
156    private final byte[] lfhBuf = new byte[LFH_LEN];
157    private final byte[] skipBuf = new byte[1024];
158    private final byte[] shortBuf = new byte[SHORT];
159    private final byte[] wordBuf = new byte[WORD];
160    private final byte[] twoDwordBuf = new byte[2 * DWORD];
161
162    private int entriesRead = 0;
163
164    /**
165     * Create an instance using UTF-8 encoding
166     * @param inputStream the stream to wrap
167     */
168    public ZipArchiveInputStream(final InputStream inputStream) {
169        this(inputStream, ZipEncodingHelper.UTF8);
170    }
171
172    /**
173     * Create an instance using the specified encoding
174     * @param inputStream the stream to wrap
175     * @param encoding the encoding to use for file names, use null
176     * for the platform's default encoding
177     * @since 1.5
178     */
179    public ZipArchiveInputStream(final InputStream inputStream, final String encoding) {
180        this(inputStream, encoding, true);
181    }
182
183    /**
184     * Create an instance using the specified encoding
185     * @param inputStream the stream to wrap
186     * @param encoding the encoding to use for file names, use null
187     * for the platform's default encoding
188     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
189     * Extra Fields (if present) to set the file names.
190     */
191    public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) {
192        this(inputStream, encoding, useUnicodeExtraFields, false);
193    }
194
195    /**
196     * Create an instance using the specified encoding
197     * @param inputStream the stream to wrap
198     * @param encoding the encoding to use for file names, use null
199     * for the platform's default encoding
200     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
201     * Extra Fields (if present) to set the file names.
202     * @param allowStoredEntriesWithDataDescriptor whether the stream
203     * will try to read STORED entries that use a data descriptor
204     * @since 1.1
205     */
206    public ZipArchiveInputStream(final InputStream inputStream,
207                                 final String encoding,
208                                 final boolean useUnicodeExtraFields,
209                                 final boolean allowStoredEntriesWithDataDescriptor) {
210        this.encoding = encoding;
211        zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
212        this.useUnicodeExtraFields = useUnicodeExtraFields;
213        in = new PushbackInputStream(inputStream, buf.capacity());
214        this.allowStoredEntriesWithDataDescriptor =
215            allowStoredEntriesWithDataDescriptor;
216        // haven't read anything so far
217        buf.limit(0);
218    }
219
220    public ZipArchiveEntry getNextZipEntry() throws IOException {
221        boolean firstEntry = true;
222        if (closed || hitCentralDirectory) {
223            return null;
224        }
225        if (current != null) {
226            closeEntry();
227            firstEntry = false;
228        }
229
230        long currentHeaderOffset = getBytesRead();
231        try {
232            if (firstEntry) {
233                // split archives have a special signature before the
234                // first local file header - look for it and fail with
235                // the appropriate error message if this is a split
236                // archive.
237                readFirstLocalFileHeader(lfhBuf);
238            } else {
239                readFully(lfhBuf);
240            }
241        } catch (final EOFException e) {
242            return null;
243        }
244
245        final ZipLong sig = new ZipLong(lfhBuf);
246        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
247            hitCentralDirectory = true;
248            skipRemainderOfArchive();
249            return null;
250        }
251        if (!sig.equals(ZipLong.LFH_SIG)) {
252            throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue()));
253        }
254
255        int off = WORD;
256        current = new CurrentEntry();
257
258        final int versionMadeBy = ZipShort.getValue(lfhBuf, off);
259        off += SHORT;
260        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
261
262        final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off);
263        final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
264        final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
265        current.hasDataDescriptor = gpFlag.usesDataDescriptor();
266        current.entry.setGeneralPurposeBit(gpFlag);
267
268        off += SHORT;
269
270        current.entry.setMethod(ZipShort.getValue(lfhBuf, off));
271        off += SHORT;
272
273        final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off));
274        current.entry.setTime(time);
275        off += WORD;
276
277        ZipLong size = null, cSize = null;
278        if (!current.hasDataDescriptor) {
279            current.entry.setCrc(ZipLong.getValue(lfhBuf, off));
280            off += WORD;
281
282            cSize = new ZipLong(lfhBuf, off);
283            off += WORD;
284
285            size = new ZipLong(lfhBuf, off);
286            off += WORD;
287        } else {
288            off += 3 * WORD;
289        }
290
291        final int fileNameLen = ZipShort.getValue(lfhBuf, off);
292
293        off += SHORT;
294
295        final int extraLen = ZipShort.getValue(lfhBuf, off);
296        off += SHORT; // NOSONAR - assignment as documentation
297
298        final byte[] fileName = new byte[fileNameLen];
299        readFully(fileName);
300        current.entry.setName(entryEncoding.decode(fileName), fileName);
301        if (hasUTF8Flag) {
302            current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
303        }
304
305        final byte[] extraData = new byte[extraLen];
306        readFully(extraData);
307        current.entry.setExtra(extraData);
308
309        if (!hasUTF8Flag && useUnicodeExtraFields) {
310            ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
311        }
312
313        processZip64Extra(size, cSize);
314
315        current.entry.setLocalHeaderOffset(currentHeaderOffset);
316        current.entry.setDataOffset(getBytesRead());
317        current.entry.setStreamContiguous(true);
318
319        ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod());
320        if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) {
321            if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) {
322                InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize());
323                switch (m) {
324                case UNSHRINKING:
325                    current.in = new UnshrinkingInputStream(bis);
326                    break;
327                case IMPLODING:
328                    current.in = new ExplodingInputStream(
329                        current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
330                        current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
331                        bis);
332                    break;
333                case BZIP2:
334                    current.in = new BZip2CompressorInputStream(bis);
335                    break;
336                case ENHANCED_DEFLATED:
337                    current.in = new Deflate64CompressorInputStream(bis);
338                    break;
339                default:
340                    // we should never get here as all supported methods have been covered
341                    // will cause an error when read is invoked, don't throw an exception here so people can
342                    // skip unsupported entries
343                    break;
344                }
345            }
346        } else if (m == ZipMethod.ENHANCED_DEFLATED) {
347            current.in = new Deflate64CompressorInputStream(in);
348        }
349
350        entriesRead++;
351        return current.entry;
352    }
353
354    /**
355     * Fills the given array with the first local file header and
356     * deals with splitting/spanning markers that may prefix the first
357     * LFH.
358     */
359    private void readFirstLocalFileHeader(final byte[] lfh) throws IOException {
360        readFully(lfh);
361        final ZipLong sig = new ZipLong(lfh);
362        if (sig.equals(ZipLong.DD_SIG)) {
363            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
364        }
365
366        if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
367            // The archive is not really split as only one segment was
368            // needed in the end.  Just skip over the marker.
369            final byte[] missedLfhBytes = new byte[4];
370            readFully(missedLfhBytes);
371            System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
372            System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
373        }
374    }
375
376    /**
377     * Records whether a Zip64 extra is present and sets the size
378     * information from it if sizes are 0xFFFFFFFF and the entry
379     * doesn't use a data descriptor.
380     */
381    private void processZip64Extra(final ZipLong size, final ZipLong cSize) {
382        final Zip64ExtendedInformationExtraField z64 =
383            (Zip64ExtendedInformationExtraField)
384            current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
385        current.usesZip64 = z64 != null;
386        if (!current.hasDataDescriptor) {
387            if (z64 != null // same as current.usesZip64 but avoids NPE warning
388                    && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
389                current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
390                current.entry.setSize(z64.getSize().getLongValue());
391            } else {
392                current.entry.setCompressedSize(cSize.getValue());
393                current.entry.setSize(size.getValue());
394            }
395        }
396    }
397
398    @Override
399    public ArchiveEntry getNextEntry() throws IOException {
400        return getNextZipEntry();
401    }
402
403    /**
404     * Whether this class is able to read the given entry.
405     *
406     * <p>May return false if it is set up to use encryption or a
407     * compression method that hasn't been implemented yet.</p>
408     * @since 1.1
409     */
410    @Override
411    public boolean canReadEntryData(final ArchiveEntry ae) {
412        if (ae instanceof ZipArchiveEntry) {
413            final ZipArchiveEntry ze = (ZipArchiveEntry) ae;
414            return ZipUtil.canHandleEntryData(ze)
415                && supportsDataDescriptorFor(ze)
416                && supportsCompressedSizeFor(ze);
417        }
418        return false;
419    }
420
421    @Override
422    public int read(final byte[] buffer, final int offset, final int length) throws IOException {
423        if (closed) {
424            throw new IOException("The stream is closed");
425        }
426
427        if (current == null) {
428            return -1;
429        }
430
431        // avoid int overflow, check null buffer
432        if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
433            throw new ArrayIndexOutOfBoundsException();
434        }
435
436        ZipUtil.checkRequestedFeatures(current.entry);
437        if (!supportsDataDescriptorFor(current.entry)) {
438            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
439                    current.entry);
440        }
441        if (!supportsCompressedSizeFor(current.entry)) {
442            throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE,
443                    current.entry);
444        }
445
446        int read;
447        if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
448            read = readStored(buffer, offset, length);
449        } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
450            read = readDeflated(buffer, offset, length);
451        } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
452                || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
453                || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
454                || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
455            read = current.in.read(buffer, offset, length);
456        } else {
457            throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
458                    current.entry);
459        }
460
461        if (read >= 0) {
462            current.crc.update(buffer, offset, read);
463        }
464
465        return read;
466    }
467
468    /**
469     * Implementation of read for STORED entries.
470     */
471    private int readStored(final byte[] buffer, final int offset, final int length) throws IOException {
472
473        if (current.hasDataDescriptor) {
474            if (lastStoredEntry == null) {
475                readStoredEntry();
476            }
477            return lastStoredEntry.read(buffer, offset, length);
478        }
479
480        final long csize = current.entry.getSize();
481        if (current.bytesRead >= csize) {
482            return -1;
483        }
484
485        if (buf.position() >= buf.limit()) {
486            buf.position(0);
487            final int l = in.read(buf.array());
488            if (l == -1) {
489                buf.limit(0);
490                throw new IOException("Truncated ZIP file");
491            }
492            buf.limit(l);
493
494            count(l);
495            current.bytesReadFromStream += l;
496        }
497
498        int toRead = Math.min(buf.remaining(), length);
499        if ((csize - current.bytesRead) < toRead) {
500            // if it is smaller than toRead then it fits into an int
501            toRead = (int) (csize - current.bytesRead);
502        }
503        buf.get(buffer, offset, toRead);
504        current.bytesRead += toRead;
505        return toRead;
506    }
507
508    /**
509     * Implementation of read for DEFLATED entries.
510     */
511    private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException {
512        final int read = readFromInflater(buffer, offset, length);
513        if (read <= 0) {
514            if (inf.finished()) {
515                return -1;
516            } else if (inf.needsDictionary()) {
517                throw new ZipException("This archive needs a preset dictionary"
518                                       + " which is not supported by Commons"
519                                       + " Compress.");
520            } else if (read == -1) {
521                throw new IOException("Truncated ZIP file");
522            }
523        }
524        return read;
525    }
526
527    /**
528     * Potentially reads more bytes to fill the inflater's buffer and
529     * reads from it.
530     */
531    private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException {
532        int read = 0;
533        do {
534            if (inf.needsInput()) {
535                final int l = fill();
536                if (l > 0) {
537                    current.bytesReadFromStream += buf.limit();
538                } else if (l == -1) {
539                    return -1;
540                } else {
541                    break;
542                }
543            }
544            try {
545                read = inf.inflate(buffer, offset, length);
546            } catch (final DataFormatException e) {
547                throw (IOException) new ZipException(e.getMessage()).initCause(e);
548            }
549        } while (read == 0 && inf.needsInput());
550        return read;
551    }
552
553    @Override
554    public void close() throws IOException {
555        if (!closed) {
556            closed = true;
557            try {
558                in.close();
559            } finally {
560                inf.end();
561            }
562        }
563    }
564
565    /**
566     * Skips over and discards value bytes of data from this input
567     * stream.
568     *
569     * <p>This implementation may end up skipping over some smaller
570     * number of bytes, possibly 0, if and only if it reaches the end
571     * of the underlying stream.</p>
572     *
573     * <p>The actual number of bytes skipped is returned.</p>
574     *
575     * @param value the number of bytes to be skipped.
576     * @return the actual number of bytes skipped.
577     * @throws IOException - if an I/O error occurs.
578     * @throws IllegalArgumentException - if value is negative.
579     */
580    @Override
581    public long skip(final long value) throws IOException {
582        if (value >= 0) {
583            long skipped = 0;
584            while (skipped < value) {
585                final long rem = value - skipped;
586                final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
587                if (x == -1) {
588                    return skipped;
589                }
590                skipped += x;
591            }
592            return skipped;
593        }
594        throw new IllegalArgumentException();
595    }
596
597    /**
598     * Checks if the signature matches what is expected for a zip file.
599     * Does not currently handle self-extracting zips which may have arbitrary
600     * leading content.
601     *
602     * @param signature the bytes to check
603     * @param length    the number of bytes to check
604     * @return true, if this stream is a zip archive stream, false otherwise
605     */
606    public static boolean matches(final byte[] signature, final int length) {
607        if (length < ZipArchiveOutputStream.LFH_SIG.length) {
608            return false;
609        }
610
611        return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
612            || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
613            || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
614            || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
615    }
616
617    private static boolean checksig(final byte[] signature, final byte[] expected) {
618        for (int i = 0; i < expected.length; i++) {
619            if (signature[i] != expected[i]) {
620                return false;
621            }
622        }
623        return true;
624    }
625
626    /**
627     * Closes the current ZIP archive entry and positions the underlying
628     * stream to the beginning of the next entry. All per-entry variables
629     * and data structures are cleared.
630     * <p>
631     * If the compressed size of this entry is included in the entry header,
632     * then any outstanding bytes are simply skipped from the underlying
633     * stream without uncompressing them. This allows an entry to be safely
634     * closed even if the compression method is unsupported.
635     * <p>
636     * In case we don't know the compressed size of this entry or have
637     * already buffered too much data from the underlying stream to support
638     * uncompression, then the uncompression process is completed and the
639     * end position of the stream is adjusted based on the result of that
640     * process.
641     *
642     * @throws IOException if an error occurs
643     */
644    private void closeEntry() throws IOException {
645        if (closed) {
646            throw new IOException("The stream is closed");
647        }
648        if (current == null) {
649            return;
650        }
651
652        // Ensure all entry bytes are read
653        if (currentEntryHasOutstandingBytes()) {
654            drainCurrentEntryData();
655        } else {
656            // this is guaranteed to exhaust the stream
657            skip(Long.MAX_VALUE); //NOSONAR
658
659            final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
660                       ? getBytesInflated() : current.bytesRead;
661
662            // this is at most a single read() operation and can't
663            // exceed the range of int
664            final int diff = (int) (current.bytesReadFromStream - inB);
665
666            // Pushback any required bytes
667            if (diff > 0) {
668                pushback(buf.array(), buf.limit() - diff, diff);
669                current.bytesReadFromStream -= diff;
670            }
671
672            // Drain remainder of entry if not all data bytes were required
673            if (currentEntryHasOutstandingBytes()) {
674                drainCurrentEntryData();
675            }
676        }
677
678        if (lastStoredEntry == null && current.hasDataDescriptor) {
679            readDataDescriptor();
680        }
681
682        inf.reset();
683        buf.clear().flip();
684        current = null;
685        lastStoredEntry = null;
686    }
687
688    /**
689     * If the compressed size of the current entry is included in the entry header
690     * and there are any outstanding bytes in the underlying stream, then
691     * this returns true.
692     *
693     * @return true, if current entry is determined to have outstanding bytes, false otherwise
694     */
695    private boolean currentEntryHasOutstandingBytes() {
696        return current.bytesReadFromStream <= current.entry.getCompressedSize()
697                && !current.hasDataDescriptor;
698    }
699
700    /**
701     * Read all data of the current entry from the underlying stream
702     * that hasn't been read, yet.
703     */
704    private void drainCurrentEntryData() throws IOException {
705        long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
706        while (remaining > 0) {
707            final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
708            if (n < 0) {
709                throw new EOFException("Truncated ZIP entry: "
710                                       + ArchiveUtils.sanitize(current.entry.getName()));
711            }
712            count(n);
713            remaining -= n;
714        }
715    }
716
717    /**
718     * Get the number of bytes Inflater has actually processed.
719     *
720     * <p>for Java &lt; Java7 the getBytes* methods in
721     * Inflater/Deflater seem to return unsigned ints rather than
722     * longs that start over with 0 at 2^32.</p>
723     *
724     * <p>The stream knows how many bytes it has read, but not how
725     * many the Inflater actually consumed - it should be between the
726     * total number of bytes read for the entry and the total number
727     * minus the last read operation.  Here we just try to make the
728     * value close enough to the bytes we've read by assuming the
729     * number of bytes consumed must be smaller than (or equal to) the
730     * number of bytes read but not smaller by more than 2^32.</p>
731     */
732    private long getBytesInflated() {
733        long inB = inf.getBytesRead();
734        if (current.bytesReadFromStream >= TWO_EXP_32) {
735            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
736                inB += TWO_EXP_32;
737            }
738        }
739        return inB;
740    }
741
742    private int fill() throws IOException {
743        if (closed) {
744            throw new IOException("The stream is closed");
745        }
746        final int length = in.read(buf.array());
747        if (length > 0) {
748            buf.limit(length);
749            count(buf.limit());
750            inf.setInput(buf.array(), 0, buf.limit());
751        }
752        return length;
753    }
754
755    private void readFully(final byte[] b) throws IOException {
756        final int count = IOUtils.readFully(in, b);
757        count(count);
758        if (count < b.length) {
759            throw new EOFException();
760        }
761    }
762
763    private void readDataDescriptor() throws IOException {
764        readFully(wordBuf);
765        ZipLong val = new ZipLong(wordBuf);
766        if (ZipLong.DD_SIG.equals(val)) {
767            // data descriptor with signature, skip sig
768            readFully(wordBuf);
769            val = new ZipLong(wordBuf);
770        }
771        current.entry.setCrc(val.getValue());
772
773        // if there is a ZIP64 extra field, sizes are eight bytes
774        // each, otherwise four bytes each.  Unfortunately some
775        // implementations - namely Java7 - use eight bytes without
776        // using a ZIP64 extra field -
777        // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
778
779        // just read 16 bytes and check whether bytes nine to twelve
780        // look like one of the signatures of what could follow a data
781        // descriptor (ignoring archive decryption headers for now).
782        // If so, push back eight bytes and assume sizes are four
783        // bytes, otherwise sizes are eight bytes each.
784        readFully(twoDwordBuf);
785        final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD);
786        if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
787            pushback(twoDwordBuf, DWORD, DWORD);
788            current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf));
789            current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD));
790        } else {
791            current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf));
792            current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD));
793        }
794    }
795
796    /**
797     * Whether this entry requires a data descriptor this library can work with.
798     *
799     * @return true if allowStoredEntriesWithDataDescriptor is true,
800     * the entry doesn't require any data descriptor or the method is
801     * DEFLATED or ENHANCED_DEFLATED.
802     */
803    private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
804        return !entry.getGeneralPurposeBit().usesDataDescriptor()
805
806                || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
807                || entry.getMethod() == ZipEntry.DEFLATED
808                || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
809    }
810
811    /**
812     * Whether the compressed size for the entry is either known or
813     * not required by the compression method being used.
814     */
815    private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
816        return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN
817            || entry.getMethod() == ZipEntry.DEFLATED
818            || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
819            || (entry.getGeneralPurposeBit().usesDataDescriptor()
820                && allowStoredEntriesWithDataDescriptor
821                && entry.getMethod() == ZipEntry.STORED);
822    }
823
824    /**
825     * Caches a stored entry that uses the data descriptor.
826     *
827     * <ul>
828     *   <li>Reads a stored entry until the signature of a local file
829     *     header, central directory header or data descriptor has been
830     *     found.</li>
831     *   <li>Stores all entry data in lastStoredEntry.</p>
832     *   <li>Rewinds the stream to position at the data
833     *     descriptor.</li>
834     *   <li>reads the data descriptor</li>
835     * </ul>
836     *
837     * <p>After calling this method the entry should know its size,
838     * the entry's data is cached and the stream is positioned at the
839     * next local file or central directory header.</p>
840     */
841    private void readStoredEntry() throws IOException {
842        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
843        int off = 0;
844        boolean done = false;
845
846        // length of DD without signature
847        final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
848
849        while (!done) {
850            final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
851            if (r <= 0) {
852                // read the whole archive without ever finding a
853                // central directory
854                throw new IOException("Truncated ZIP file");
855            }
856            if (r + off < 4) {
857                // buffer too small to check for a signature, loop
858                off += r;
859                continue;
860            }
861
862            done = bufferContainsSignature(bos, off, r, ddLen);
863            if (!done) {
864                off = cacheBytesRead(bos, off, r, ddLen);
865            }
866        }
867
868        final byte[] b = bos.toByteArray();
869        lastStoredEntry = new ByteArrayInputStream(b);
870    }
871
872    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
873    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
874    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
875
876    /**
877     * Checks whether the current buffer contains the signature of a
878     * &quot;data descriptor&quot;, &quot;local file header&quot; or
879     * &quot;central directory entry&quot;.
880     *
881     * <p>If it contains such a signature, reads the data descriptor
882     * and positions the stream right after the data descriptor.</p>
883     */
884    private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen)
885            throws IOException {
886
887        boolean done = false;
888        int readTooMuch = 0;
889        for (int i = 0; !done && i < lastRead - 4; i++) {
890            if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
891                if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
892                    || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
893                    // found a LFH or CFH:
894                    readTooMuch = offset + lastRead - i - expectedDDLen;
895                    done = true;
896                }
897                else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
898                    // found DD:
899                    readTooMuch = offset + lastRead - i;
900                    done = true;
901                }
902                if (done) {
903                    // * push back bytes read in excess as well as the data
904                    //   descriptor
905                    // * copy the remaining bytes to cache
906                    // * read data descriptor
907                    pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
908                    bos.write(buf.array(), 0, i);
909                    readDataDescriptor();
910                }
911            }
912        }
913        return done;
914    }
915
916    /**
917     * If the last read bytes could hold a data descriptor and an
918     * incomplete signature then save the last bytes to the front of
919     * the buffer and cache everything in front of the potential data
920     * descriptor into the given ByteArrayOutputStream.
921     *
922     * <p>Data descriptor plus incomplete signature (3 bytes in the
923     * worst case) can be 20 bytes max.</p>
924     */
925    private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) {
926        final int cacheable = offset + lastRead - expecteDDLen - 3;
927        if (cacheable > 0) {
928            bos.write(buf.array(), 0, cacheable);
929            System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
930            offset = expecteDDLen + 3;
931        } else {
932            offset += lastRead;
933        }
934        return offset;
935    }
936
937    private void pushback(final byte[] buf, final int offset, final int length) throws IOException {
938        ((PushbackInputStream) in).unread(buf, offset, length);
939        pushedBackBytes(length);
940    }
941
942    // End of Central Directory Record
943    //   end of central dir signature    WORD
944    //   number of this disk             SHORT
945    //   number of the disk with the
946    //   start of the central directory  SHORT
947    //   total number of entries in the
948    //   central directory on this disk  SHORT
949    //   total number of entries in
950    //   the central directory           SHORT
951    //   size of the central directory   WORD
952    //   offset of start of central
953    //   directory with respect to
954    //   the starting disk number        WORD
955    //   .ZIP file comment length        SHORT
956    //   .ZIP file comment               up to 64KB
957    //
958
959    /**
960     * Reads the stream until it find the "End of central directory
961     * record" and consumes it as well.
962     */
963    private void skipRemainderOfArchive() throws IOException {
964        // skip over central directory. One LFH has been read too much
965        // already.  The calculation discounts file names and extra
966        // data so it will be too short.
967        realSkip((long) entriesRead * CFH_LEN - LFH_LEN);
968        findEocdRecord();
969        realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
970        readFully(shortBuf);
971        // file comment
972        realSkip(ZipShort.getValue(shortBuf));
973    }
974
975    /**
976     * Reads forward until the signature of the &quot;End of central
977     * directory&quot; record is found.
978     */
979    private void findEocdRecord() throws IOException {
980        int currentByte = -1;
981        boolean skipReadCall = false;
982        while (skipReadCall || (currentByte = readOneByte()) > -1) {
983            skipReadCall = false;
984            if (!isFirstByteOfEocdSig(currentByte)) {
985                continue;
986            }
987            currentByte = readOneByte();
988            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
989                if (currentByte == -1) {
990                    break;
991                }
992                skipReadCall = isFirstByteOfEocdSig(currentByte);
993                continue;
994            }
995            currentByte = readOneByte();
996            if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
997                if (currentByte == -1) {
998                    break;
999                }
1000                skipReadCall = isFirstByteOfEocdSig(currentByte);
1001                continue;
1002            }
1003            currentByte = readOneByte();
1004            if (currentByte == -1
1005                || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
1006                break;
1007            }
1008            skipReadCall = isFirstByteOfEocdSig(currentByte);
1009        }
1010    }
1011
1012    /**
1013     * Skips bytes by reading from the underlying stream rather than
1014     * the (potentially inflating) archive stream - which {@link
1015     * #skip} would do.
1016     *
1017     * Also updates bytes-read counter.
1018     */
1019    private void realSkip(final long value) throws IOException {
1020        if (value >= 0) {
1021            long skipped = 0;
1022            while (skipped < value) {
1023                final long rem = value - skipped;
1024                final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length));
1025                if (x == -1) {
1026                    return;
1027                }
1028                count(x);
1029                skipped += x;
1030            }
1031            return;
1032        }
1033        throw new IllegalArgumentException();
1034    }
1035
1036    /**
1037     * Reads bytes by reading from the underlying stream rather than
1038     * the (potentially inflating) archive stream - which {@link #read} would do.
1039     *
1040     * Also updates bytes-read counter.
1041     */
1042    private int readOneByte() throws IOException {
1043        final int b = in.read();
1044        if (b != -1) {
1045            count(1);
1046        }
1047        return b;
1048    }
1049
1050    private boolean isFirstByteOfEocdSig(final int b) {
1051        return b == ZipArchiveOutputStream.EOCD_SIG[0];
1052    }
1053
1054    /**
1055     * Structure collecting information for the entry that is
1056     * currently being read.
1057     */
1058    private static final class CurrentEntry {
1059
1060        /**
1061         * Current ZIP entry.
1062         */
1063        private final ZipArchiveEntry entry = new ZipArchiveEntry();
1064
1065        /**
1066         * Does the entry use a data descriptor?
1067         */
1068        private boolean hasDataDescriptor;
1069
1070        /**
1071         * Does the entry have a ZIP64 extended information extra field.
1072         */
1073        private boolean usesZip64;
1074
1075        /**
1076         * Number of bytes of entry content read by the client if the
1077         * entry is STORED.
1078         */
1079        private long bytesRead;
1080
1081        /**
1082         * Number of bytes of entry content read so from the stream.
1083         *
1084         * <p>This may be more than the actual entry's length as some
1085         * stuff gets buffered up and needs to be pushed back when the
1086         * end of the entry has been reached.</p>
1087         */
1088        private long bytesReadFromStream;
1089
1090        /**
1091         * The checksum calculated as the current entry is read.
1092         */
1093        private final CRC32 crc = new CRC32();
1094
1095        /**
1096         * The input stream decompressing the data for shrunk and imploded entries.
1097         */
1098        private InputStream in;
1099    }
1100
1101    /**
1102     * Bounded input stream adapted from commons-io
1103     */
1104    private class BoundedInputStream extends InputStream {
1105
1106        /** the wrapped input stream */
1107        private final InputStream in;
1108
1109        /** the max length to provide */
1110        private final long max;
1111
1112        /** the number of bytes already returned */
1113        private long pos = 0;
1114
1115        /**
1116         * Creates a new <code>BoundedInputStream</code> that wraps the given input
1117         * stream and limits it to a certain size.
1118         *
1119         * @param in The wrapped input stream
1120         * @param size The maximum number of bytes to return
1121         */
1122        public BoundedInputStream(final InputStream in, final long size) {
1123            this.max = size;
1124            this.in = in;
1125        }
1126
1127        @Override
1128        public int read() throws IOException {
1129            if (max >= 0 && pos >= max) {
1130                return -1;
1131            }
1132            final int result = in.read();
1133            pos++;
1134            count(1);
1135            current.bytesReadFromStream++;
1136            return result;
1137        }
1138
1139        @Override
1140        public int read(final byte[] b) throws IOException {
1141            return this.read(b, 0, b.length);
1142        }
1143
1144        @Override
1145        public int read(final byte[] b, final int off, final int len) throws IOException {
1146            if (max >= 0 && pos >= max) {
1147                return -1;
1148            }
1149            final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1150            final int bytesRead = in.read(b, off, (int) maxRead);
1151
1152            if (bytesRead == -1) {
1153                return -1;
1154            }
1155
1156            pos += bytesRead;
1157            count(bytesRead);
1158            current.bytesReadFromStream += bytesRead;
1159            return bytesRead;
1160        }
1161
1162        @Override
1163        public long skip(final long n) throws IOException {
1164            final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1165            final long skippedBytes = in.skip(toSkip);
1166            pos += skippedBytes;
1167            return skippedBytes;
1168        }
1169
1170        @Override
1171        public int available() throws IOException {
1172            if (max >= 0 && pos >= max) {
1173                return 0;
1174            }
1175            return in.available();
1176        }
1177    }
1178}