001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.zip; 020 021import java.io.ByteArrayInputStream; 022import java.io.ByteArrayOutputStream; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.PushbackInputStream; 027import java.nio.ByteBuffer; 028import java.util.zip.CRC32; 029import java.util.zip.DataFormatException; 030import java.util.zip.Inflater; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033 034import org.apache.commons.compress.archivers.ArchiveEntry; 035import org.apache.commons.compress.archivers.ArchiveInputStream; 036import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 037import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 038import org.apache.commons.compress.utils.ArchiveUtils; 039import org.apache.commons.compress.utils.IOUtils; 040 041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 045 046/** 047 * Implements an input stream that can read Zip archives. 048 * 049 * <p>As of Apache Commons Compress it transparently supports Zip64 050 * extensions and thus individual entries and archives larger than 4 051 * GB or with more than 65536 entries.</p> 052 * 053 * <p>The {@link ZipFile} class is preferred when reading from files 054 * as {@link ZipArchiveInputStream} is limited by not being able to 055 * read the central directory header before returning entries. In 056 * particular {@link ZipArchiveInputStream}</p> 057 * 058 * <ul> 059 * 060 * <li>may return entries that are not part of the central directory 061 * at all and shouldn't be considered part of the archive.</li> 062 * 063 * <li>may return several entries with the same name.</li> 064 * 065 * <li>will not return internal or external attributes.</li> 066 * 067 * <li>may return incomplete extra field data.</li> 068 * 069 * <li>may return unknown sizes and CRC values for entries until the 070 * next entry has been reached if the archive uses the data 071 * descriptor feature.</li> 072 * 073 * </ul> 074 * 075 * @see ZipFile 076 * @NotThreadSafe 077 */ 078public class ZipArchiveInputStream extends ArchiveInputStream { 079 080 /** The zip encoding to use for filenames and the file comment. */ 081 private final ZipEncoding zipEncoding; 082 083 // the provided encoding (for unit tests) 084 final String encoding; 085 086 /** Whether to look for and use Unicode extra fields. */ 087 private final boolean useUnicodeExtraFields; 088 089 /** Wrapped stream, will always be a PushbackInputStream. */ 090 private final InputStream in; 091 092 /** Inflater used for all deflated entries. */ 093 private final Inflater inf = new Inflater(true); 094 095 /** Buffer used to read from the wrapped stream. */ 096 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE); 097 098 /** The entry that is currently being read. */ 099 private CurrentEntry current = null; 100 101 /** Whether the stream has been closed. */ 102 private boolean closed = false; 103 104 /** Whether the stream has reached the central directory - and thus found all entries. */ 105 private boolean hitCentralDirectory = false; 106 107 /** 108 * When reading a stored entry that uses the data descriptor this 109 * stream has to read the full entry and caches it. This is the 110 * cache. 111 */ 112 private ByteArrayInputStream lastStoredEntry = null; 113 114 /** Whether the stream will try to read STORED entries that use a data descriptor. */ 115 private boolean allowStoredEntriesWithDataDescriptor = false; 116 117 private static final int LFH_LEN = 30; 118 /* 119 local file header signature WORD 120 version needed to extract SHORT 121 general purpose bit flag SHORT 122 compression method SHORT 123 last mod file time SHORT 124 last mod file date SHORT 125 crc-32 WORD 126 compressed size WORD 127 uncompressed size WORD 128 file name length SHORT 129 extra field length SHORT 130 */ 131 132 private static final int CFH_LEN = 46; 133 /* 134 central file header signature WORD 135 version made by SHORT 136 version needed to extract SHORT 137 general purpose bit flag SHORT 138 compression method SHORT 139 last mod file time SHORT 140 last mod file date SHORT 141 crc-32 WORD 142 compressed size WORD 143 uncompressed size WORD 144 file name length SHORT 145 extra field length SHORT 146 file comment length SHORT 147 disk number start SHORT 148 internal file attributes SHORT 149 external file attributes WORD 150 relative offset of local header WORD 151 */ 152 153 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1; 154 155 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 156 private final byte[] lfhBuf = new byte[LFH_LEN]; 157 private final byte[] skipBuf = new byte[1024]; 158 private final byte[] shortBuf = new byte[SHORT]; 159 private final byte[] wordBuf = new byte[WORD]; 160 private final byte[] twoDwordBuf = new byte[2 * DWORD]; 161 162 private int entriesRead = 0; 163 164 /** 165 * Create an instance using UTF-8 encoding 166 * @param inputStream the stream to wrap 167 */ 168 public ZipArchiveInputStream(final InputStream inputStream) { 169 this(inputStream, ZipEncodingHelper.UTF8); 170 } 171 172 /** 173 * Create an instance using the specified encoding 174 * @param inputStream the stream to wrap 175 * @param encoding the encoding to use for file names, use null 176 * for the platform's default encoding 177 * @since 1.5 178 */ 179 public ZipArchiveInputStream(final InputStream inputStream, final String encoding) { 180 this(inputStream, encoding, true); 181 } 182 183 /** 184 * Create an instance using the specified encoding 185 * @param inputStream the stream to wrap 186 * @param encoding the encoding to use for file names, use null 187 * for the platform's default encoding 188 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 189 * Extra Fields (if present) to set the file names. 190 */ 191 public ZipArchiveInputStream(final InputStream inputStream, final String encoding, final boolean useUnicodeExtraFields) { 192 this(inputStream, encoding, useUnicodeExtraFields, false); 193 } 194 195 /** 196 * Create an instance using the specified encoding 197 * @param inputStream the stream to wrap 198 * @param encoding the encoding to use for file names, use null 199 * for the platform's default encoding 200 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 201 * Extra Fields (if present) to set the file names. 202 * @param allowStoredEntriesWithDataDescriptor whether the stream 203 * will try to read STORED entries that use a data descriptor 204 * @since 1.1 205 */ 206 public ZipArchiveInputStream(final InputStream inputStream, 207 final String encoding, 208 final boolean useUnicodeExtraFields, 209 final boolean allowStoredEntriesWithDataDescriptor) { 210 this.encoding = encoding; 211 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 212 this.useUnicodeExtraFields = useUnicodeExtraFields; 213 in = new PushbackInputStream(inputStream, buf.capacity()); 214 this.allowStoredEntriesWithDataDescriptor = 215 allowStoredEntriesWithDataDescriptor; 216 // haven't read anything so far 217 buf.limit(0); 218 } 219 220 public ZipArchiveEntry getNextZipEntry() throws IOException { 221 boolean firstEntry = true; 222 if (closed || hitCentralDirectory) { 223 return null; 224 } 225 if (current != null) { 226 closeEntry(); 227 firstEntry = false; 228 } 229 230 long currentHeaderOffset = getBytesRead(); 231 try { 232 if (firstEntry) { 233 // split archives have a special signature before the 234 // first local file header - look for it and fail with 235 // the appropriate error message if this is a split 236 // archive. 237 readFirstLocalFileHeader(lfhBuf); 238 } else { 239 readFully(lfhBuf); 240 } 241 } catch (final EOFException e) { 242 return null; 243 } 244 245 final ZipLong sig = new ZipLong(lfhBuf); 246 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) { 247 hitCentralDirectory = true; 248 skipRemainderOfArchive(); 249 return null; 250 } 251 if (!sig.equals(ZipLong.LFH_SIG)) { 252 throw new ZipException(String.format("Unexpected record signature: 0X%X", sig.getValue())); 253 } 254 255 int off = WORD; 256 current = new CurrentEntry(); 257 258 final int versionMadeBy = ZipShort.getValue(lfhBuf, off); 259 off += SHORT; 260 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK); 261 262 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfhBuf, off); 263 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 264 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 265 current.hasDataDescriptor = gpFlag.usesDataDescriptor(); 266 current.entry.setGeneralPurposeBit(gpFlag); 267 268 off += SHORT; 269 270 current.entry.setMethod(ZipShort.getValue(lfhBuf, off)); 271 off += SHORT; 272 273 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfhBuf, off)); 274 current.entry.setTime(time); 275 off += WORD; 276 277 ZipLong size = null, cSize = null; 278 if (!current.hasDataDescriptor) { 279 current.entry.setCrc(ZipLong.getValue(lfhBuf, off)); 280 off += WORD; 281 282 cSize = new ZipLong(lfhBuf, off); 283 off += WORD; 284 285 size = new ZipLong(lfhBuf, off); 286 off += WORD; 287 } else { 288 off += 3 * WORD; 289 } 290 291 final int fileNameLen = ZipShort.getValue(lfhBuf, off); 292 293 off += SHORT; 294 295 final int extraLen = ZipShort.getValue(lfhBuf, off); 296 off += SHORT; // NOSONAR - assignment as documentation 297 298 final byte[] fileName = new byte[fileNameLen]; 299 readFully(fileName); 300 current.entry.setName(entryEncoding.decode(fileName), fileName); 301 if (hasUTF8Flag) { 302 current.entry.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 303 } 304 305 final byte[] extraData = new byte[extraLen]; 306 readFully(extraData); 307 current.entry.setExtra(extraData); 308 309 if (!hasUTF8Flag && useUnicodeExtraFields) { 310 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null); 311 } 312 313 processZip64Extra(size, cSize); 314 315 current.entry.setLocalHeaderOffset(currentHeaderOffset); 316 current.entry.setDataOffset(getBytesRead()); 317 current.entry.setStreamContiguous(true); 318 319 ZipMethod m = ZipMethod.getMethodByCode(current.entry.getMethod()); 320 if (current.entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN) { 321 if (ZipUtil.canHandleEntryData(current.entry) && m != ZipMethod.STORED && m != ZipMethod.DEFLATED) { 322 InputStream bis = new BoundedInputStream(in, current.entry.getCompressedSize()); 323 switch (m) { 324 case UNSHRINKING: 325 current.in = new UnshrinkingInputStream(bis); 326 break; 327 case IMPLODING: 328 current.in = new ExplodingInputStream( 329 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(), 330 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), 331 bis); 332 break; 333 case BZIP2: 334 current.in = new BZip2CompressorInputStream(bis); 335 break; 336 case ENHANCED_DEFLATED: 337 current.in = new Deflate64CompressorInputStream(bis); 338 break; 339 default: 340 // we should never get here as all supported methods have been covered 341 // will cause an error when read is invoked, don't throw an exception here so people can 342 // skip unsupported entries 343 break; 344 } 345 } 346 } else if (m == ZipMethod.ENHANCED_DEFLATED) { 347 current.in = new Deflate64CompressorInputStream(in); 348 } 349 350 entriesRead++; 351 return current.entry; 352 } 353 354 /** 355 * Fills the given array with the first local file header and 356 * deals with splitting/spanning markers that may prefix the first 357 * LFH. 358 */ 359 private void readFirstLocalFileHeader(final byte[] lfh) throws IOException { 360 readFully(lfh); 361 final ZipLong sig = new ZipLong(lfh); 362 if (sig.equals(ZipLong.DD_SIG)) { 363 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING); 364 } 365 366 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) { 367 // The archive is not really split as only one segment was 368 // needed in the end. Just skip over the marker. 369 final byte[] missedLfhBytes = new byte[4]; 370 readFully(missedLfhBytes); 371 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4); 372 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4); 373 } 374 } 375 376 /** 377 * Records whether a Zip64 extra is present and sets the size 378 * information from it if sizes are 0xFFFFFFFF and the entry 379 * doesn't use a data descriptor. 380 */ 381 private void processZip64Extra(final ZipLong size, final ZipLong cSize) { 382 final Zip64ExtendedInformationExtraField z64 = 383 (Zip64ExtendedInformationExtraField) 384 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 385 current.usesZip64 = z64 != null; 386 if (!current.hasDataDescriptor) { 387 if (z64 != null // same as current.usesZip64 but avoids NPE warning 388 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) { 389 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue()); 390 current.entry.setSize(z64.getSize().getLongValue()); 391 } else { 392 current.entry.setCompressedSize(cSize.getValue()); 393 current.entry.setSize(size.getValue()); 394 } 395 } 396 } 397 398 @Override 399 public ArchiveEntry getNextEntry() throws IOException { 400 return getNextZipEntry(); 401 } 402 403 /** 404 * Whether this class is able to read the given entry. 405 * 406 * <p>May return false if it is set up to use encryption or a 407 * compression method that hasn't been implemented yet.</p> 408 * @since 1.1 409 */ 410 @Override 411 public boolean canReadEntryData(final ArchiveEntry ae) { 412 if (ae instanceof ZipArchiveEntry) { 413 final ZipArchiveEntry ze = (ZipArchiveEntry) ae; 414 return ZipUtil.canHandleEntryData(ze) 415 && supportsDataDescriptorFor(ze) 416 && supportsCompressedSizeFor(ze); 417 } 418 return false; 419 } 420 421 @Override 422 public int read(final byte[] buffer, final int offset, final int length) throws IOException { 423 if (closed) { 424 throw new IOException("The stream is closed"); 425 } 426 427 if (current == null) { 428 return -1; 429 } 430 431 // avoid int overflow, check null buffer 432 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) { 433 throw new ArrayIndexOutOfBoundsException(); 434 } 435 436 ZipUtil.checkRequestedFeatures(current.entry); 437 if (!supportsDataDescriptorFor(current.entry)) { 438 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR, 439 current.entry); 440 } 441 if (!supportsCompressedSizeFor(current.entry)) { 442 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.UNKNOWN_COMPRESSED_SIZE, 443 current.entry); 444 } 445 446 int read; 447 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { 448 read = readStored(buffer, offset, length); 449 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { 450 read = readDeflated(buffer, offset, length); 451 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() 452 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() 453 || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 454 || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { 455 read = current.in.read(buffer, offset, length); 456 } else { 457 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), 458 current.entry); 459 } 460 461 if (read >= 0) { 462 current.crc.update(buffer, offset, read); 463 } 464 465 return read; 466 } 467 468 /** 469 * Implementation of read for STORED entries. 470 */ 471 private int readStored(final byte[] buffer, final int offset, final int length) throws IOException { 472 473 if (current.hasDataDescriptor) { 474 if (lastStoredEntry == null) { 475 readStoredEntry(); 476 } 477 return lastStoredEntry.read(buffer, offset, length); 478 } 479 480 final long csize = current.entry.getSize(); 481 if (current.bytesRead >= csize) { 482 return -1; 483 } 484 485 if (buf.position() >= buf.limit()) { 486 buf.position(0); 487 final int l = in.read(buf.array()); 488 if (l == -1) { 489 buf.limit(0); 490 throw new IOException("Truncated ZIP file"); 491 } 492 buf.limit(l); 493 494 count(l); 495 current.bytesReadFromStream += l; 496 } 497 498 int toRead = Math.min(buf.remaining(), length); 499 if ((csize - current.bytesRead) < toRead) { 500 // if it is smaller than toRead then it fits into an int 501 toRead = (int) (csize - current.bytesRead); 502 } 503 buf.get(buffer, offset, toRead); 504 current.bytesRead += toRead; 505 return toRead; 506 } 507 508 /** 509 * Implementation of read for DEFLATED entries. 510 */ 511 private int readDeflated(final byte[] buffer, final int offset, final int length) throws IOException { 512 final int read = readFromInflater(buffer, offset, length); 513 if (read <= 0) { 514 if (inf.finished()) { 515 return -1; 516 } else if (inf.needsDictionary()) { 517 throw new ZipException("This archive needs a preset dictionary" 518 + " which is not supported by Commons" 519 + " Compress."); 520 } else if (read == -1) { 521 throw new IOException("Truncated ZIP file"); 522 } 523 } 524 return read; 525 } 526 527 /** 528 * Potentially reads more bytes to fill the inflater's buffer and 529 * reads from it. 530 */ 531 private int readFromInflater(final byte[] buffer, final int offset, final int length) throws IOException { 532 int read = 0; 533 do { 534 if (inf.needsInput()) { 535 final int l = fill(); 536 if (l > 0) { 537 current.bytesReadFromStream += buf.limit(); 538 } else if (l == -1) { 539 return -1; 540 } else { 541 break; 542 } 543 } 544 try { 545 read = inf.inflate(buffer, offset, length); 546 } catch (final DataFormatException e) { 547 throw (IOException) new ZipException(e.getMessage()).initCause(e); 548 } 549 } while (read == 0 && inf.needsInput()); 550 return read; 551 } 552 553 @Override 554 public void close() throws IOException { 555 if (!closed) { 556 closed = true; 557 try { 558 in.close(); 559 } finally { 560 inf.end(); 561 } 562 } 563 } 564 565 /** 566 * Skips over and discards value bytes of data from this input 567 * stream. 568 * 569 * <p>This implementation may end up skipping over some smaller 570 * number of bytes, possibly 0, if and only if it reaches the end 571 * of the underlying stream.</p> 572 * 573 * <p>The actual number of bytes skipped is returned.</p> 574 * 575 * @param value the number of bytes to be skipped. 576 * @return the actual number of bytes skipped. 577 * @throws IOException - if an I/O error occurs. 578 * @throws IllegalArgumentException - if value is negative. 579 */ 580 @Override 581 public long skip(final long value) throws IOException { 582 if (value >= 0) { 583 long skipped = 0; 584 while (skipped < value) { 585 final long rem = value - skipped; 586 final int x = read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 587 if (x == -1) { 588 return skipped; 589 } 590 skipped += x; 591 } 592 return skipped; 593 } 594 throw new IllegalArgumentException(); 595 } 596 597 /** 598 * Checks if the signature matches what is expected for a zip file. 599 * Does not currently handle self-extracting zips which may have arbitrary 600 * leading content. 601 * 602 * @param signature the bytes to check 603 * @param length the number of bytes to check 604 * @return true, if this stream is a zip archive stream, false otherwise 605 */ 606 public static boolean matches(final byte[] signature, final int length) { 607 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 608 return false; 609 } 610 611 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 612 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip 613 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip 614 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes()); 615 } 616 617 private static boolean checksig(final byte[] signature, final byte[] expected) { 618 for (int i = 0; i < expected.length; i++) { 619 if (signature[i] != expected[i]) { 620 return false; 621 } 622 } 623 return true; 624 } 625 626 /** 627 * Closes the current ZIP archive entry and positions the underlying 628 * stream to the beginning of the next entry. All per-entry variables 629 * and data structures are cleared. 630 * <p> 631 * If the compressed size of this entry is included in the entry header, 632 * then any outstanding bytes are simply skipped from the underlying 633 * stream without uncompressing them. This allows an entry to be safely 634 * closed even if the compression method is unsupported. 635 * <p> 636 * In case we don't know the compressed size of this entry or have 637 * already buffered too much data from the underlying stream to support 638 * uncompression, then the uncompression process is completed and the 639 * end position of the stream is adjusted based on the result of that 640 * process. 641 * 642 * @throws IOException if an error occurs 643 */ 644 private void closeEntry() throws IOException { 645 if (closed) { 646 throw new IOException("The stream is closed"); 647 } 648 if (current == null) { 649 return; 650 } 651 652 // Ensure all entry bytes are read 653 if (currentEntryHasOutstandingBytes()) { 654 drainCurrentEntryData(); 655 } else { 656 // this is guaranteed to exhaust the stream 657 skip(Long.MAX_VALUE); //NOSONAR 658 659 final long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED 660 ? getBytesInflated() : current.bytesRead; 661 662 // this is at most a single read() operation and can't 663 // exceed the range of int 664 final int diff = (int) (current.bytesReadFromStream - inB); 665 666 // Pushback any required bytes 667 if (diff > 0) { 668 pushback(buf.array(), buf.limit() - diff, diff); 669 current.bytesReadFromStream -= diff; 670 } 671 672 // Drain remainder of entry if not all data bytes were required 673 if (currentEntryHasOutstandingBytes()) { 674 drainCurrentEntryData(); 675 } 676 } 677 678 if (lastStoredEntry == null && current.hasDataDescriptor) { 679 readDataDescriptor(); 680 } 681 682 inf.reset(); 683 buf.clear().flip(); 684 current = null; 685 lastStoredEntry = null; 686 } 687 688 /** 689 * If the compressed size of the current entry is included in the entry header 690 * and there are any outstanding bytes in the underlying stream, then 691 * this returns true. 692 * 693 * @return true, if current entry is determined to have outstanding bytes, false otherwise 694 */ 695 private boolean currentEntryHasOutstandingBytes() { 696 return current.bytesReadFromStream <= current.entry.getCompressedSize() 697 && !current.hasDataDescriptor; 698 } 699 700 /** 701 * Read all data of the current entry from the underlying stream 702 * that hasn't been read, yet. 703 */ 704 private void drainCurrentEntryData() throws IOException { 705 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream; 706 while (remaining > 0) { 707 final long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining)); 708 if (n < 0) { 709 throw new EOFException("Truncated ZIP entry: " 710 + ArchiveUtils.sanitize(current.entry.getName())); 711 } 712 count(n); 713 remaining -= n; 714 } 715 } 716 717 /** 718 * Get the number of bytes Inflater has actually processed. 719 * 720 * <p>for Java < Java7 the getBytes* methods in 721 * Inflater/Deflater seem to return unsigned ints rather than 722 * longs that start over with 0 at 2^32.</p> 723 * 724 * <p>The stream knows how many bytes it has read, but not how 725 * many the Inflater actually consumed - it should be between the 726 * total number of bytes read for the entry and the total number 727 * minus the last read operation. Here we just try to make the 728 * value close enough to the bytes we've read by assuming the 729 * number of bytes consumed must be smaller than (or equal to) the 730 * number of bytes read but not smaller by more than 2^32.</p> 731 */ 732 private long getBytesInflated() { 733 long inB = inf.getBytesRead(); 734 if (current.bytesReadFromStream >= TWO_EXP_32) { 735 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) { 736 inB += TWO_EXP_32; 737 } 738 } 739 return inB; 740 } 741 742 private int fill() throws IOException { 743 if (closed) { 744 throw new IOException("The stream is closed"); 745 } 746 final int length = in.read(buf.array()); 747 if (length > 0) { 748 buf.limit(length); 749 count(buf.limit()); 750 inf.setInput(buf.array(), 0, buf.limit()); 751 } 752 return length; 753 } 754 755 private void readFully(final byte[] b) throws IOException { 756 final int count = IOUtils.readFully(in, b); 757 count(count); 758 if (count < b.length) { 759 throw new EOFException(); 760 } 761 } 762 763 private void readDataDescriptor() throws IOException { 764 readFully(wordBuf); 765 ZipLong val = new ZipLong(wordBuf); 766 if (ZipLong.DD_SIG.equals(val)) { 767 // data descriptor with signature, skip sig 768 readFully(wordBuf); 769 val = new ZipLong(wordBuf); 770 } 771 current.entry.setCrc(val.getValue()); 772 773 // if there is a ZIP64 extra field, sizes are eight bytes 774 // each, otherwise four bytes each. Unfortunately some 775 // implementations - namely Java7 - use eight bytes without 776 // using a ZIP64 extra field - 777 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588 778 779 // just read 16 bytes and check whether bytes nine to twelve 780 // look like one of the signatures of what could follow a data 781 // descriptor (ignoring archive decryption headers for now). 782 // If so, push back eight bytes and assume sizes are four 783 // bytes, otherwise sizes are eight bytes each. 784 readFully(twoDwordBuf); 785 final ZipLong potentialSig = new ZipLong(twoDwordBuf, DWORD); 786 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) { 787 pushback(twoDwordBuf, DWORD, DWORD); 788 current.entry.setCompressedSize(ZipLong.getValue(twoDwordBuf)); 789 current.entry.setSize(ZipLong.getValue(twoDwordBuf, WORD)); 790 } else { 791 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(twoDwordBuf)); 792 current.entry.setSize(ZipEightByteInteger.getLongValue(twoDwordBuf, DWORD)); 793 } 794 } 795 796 /** 797 * Whether this entry requires a data descriptor this library can work with. 798 * 799 * @return true if allowStoredEntriesWithDataDescriptor is true, 800 * the entry doesn't require any data descriptor or the method is 801 * DEFLATED or ENHANCED_DEFLATED. 802 */ 803 private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { 804 return !entry.getGeneralPurposeBit().usesDataDescriptor() 805 806 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED) 807 || entry.getMethod() == ZipEntry.DEFLATED 808 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); 809 } 810 811 /** 812 * Whether the compressed size for the entry is either known or 813 * not required by the compression method being used. 814 */ 815 private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { 816 return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN 817 || entry.getMethod() == ZipEntry.DEFLATED 818 || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() 819 || (entry.getGeneralPurposeBit().usesDataDescriptor() 820 && allowStoredEntriesWithDataDescriptor 821 && entry.getMethod() == ZipEntry.STORED); 822 } 823 824 /** 825 * Caches a stored entry that uses the data descriptor. 826 * 827 * <ul> 828 * <li>Reads a stored entry until the signature of a local file 829 * header, central directory header or data descriptor has been 830 * found.</li> 831 * <li>Stores all entry data in lastStoredEntry.</p> 832 * <li>Rewinds the stream to position at the data 833 * descriptor.</li> 834 * <li>reads the data descriptor</li> 835 * </ul> 836 * 837 * <p>After calling this method the entry should know its size, 838 * the entry's data is cached and the stream is positioned at the 839 * next local file or central directory header.</p> 840 */ 841 private void readStoredEntry() throws IOException { 842 final ByteArrayOutputStream bos = new ByteArrayOutputStream(); 843 int off = 0; 844 boolean done = false; 845 846 // length of DD without signature 847 final int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD; 848 849 while (!done) { 850 final int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off); 851 if (r <= 0) { 852 // read the whole archive without ever finding a 853 // central directory 854 throw new IOException("Truncated ZIP file"); 855 } 856 if (r + off < 4) { 857 // buffer too small to check for a signature, loop 858 off += r; 859 continue; 860 } 861 862 done = bufferContainsSignature(bos, off, r, ddLen); 863 if (!done) { 864 off = cacheBytesRead(bos, off, r, ddLen); 865 } 866 } 867 868 final byte[] b = bos.toByteArray(); 869 lastStoredEntry = new ByteArrayInputStream(b); 870 } 871 872 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes(); 873 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes(); 874 private static final byte[] DD = ZipLong.DD_SIG.getBytes(); 875 876 /** 877 * Checks whether the current buffer contains the signature of a 878 * "data descriptor", "local file header" or 879 * "central directory entry". 880 * 881 * <p>If it contains such a signature, reads the data descriptor 882 * and positions the stream right after the data descriptor.</p> 883 */ 884 private boolean bufferContainsSignature(final ByteArrayOutputStream bos, final int offset, final int lastRead, final int expectedDDLen) 885 throws IOException { 886 887 boolean done = false; 888 int readTooMuch = 0; 889 for (int i = 0; !done && i < lastRead - 4; i++) { 890 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) { 891 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3]) 892 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) { 893 // found a LFH or CFH: 894 readTooMuch = offset + lastRead - i - expectedDDLen; 895 done = true; 896 } 897 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) { 898 // found DD: 899 readTooMuch = offset + lastRead - i; 900 done = true; 901 } 902 if (done) { 903 // * push back bytes read in excess as well as the data 904 // descriptor 905 // * copy the remaining bytes to cache 906 // * read data descriptor 907 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch); 908 bos.write(buf.array(), 0, i); 909 readDataDescriptor(); 910 } 911 } 912 } 913 return done; 914 } 915 916 /** 917 * If the last read bytes could hold a data descriptor and an 918 * incomplete signature then save the last bytes to the front of 919 * the buffer and cache everything in front of the potential data 920 * descriptor into the given ByteArrayOutputStream. 921 * 922 * <p>Data descriptor plus incomplete signature (3 bytes in the 923 * worst case) can be 20 bytes max.</p> 924 */ 925 private int cacheBytesRead(final ByteArrayOutputStream bos, int offset, final int lastRead, final int expecteDDLen) { 926 final int cacheable = offset + lastRead - expecteDDLen - 3; 927 if (cacheable > 0) { 928 bos.write(buf.array(), 0, cacheable); 929 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3); 930 offset = expecteDDLen + 3; 931 } else { 932 offset += lastRead; 933 } 934 return offset; 935 } 936 937 private void pushback(final byte[] buf, final int offset, final int length) throws IOException { 938 ((PushbackInputStream) in).unread(buf, offset, length); 939 pushedBackBytes(length); 940 } 941 942 // End of Central Directory Record 943 // end of central dir signature WORD 944 // number of this disk SHORT 945 // number of the disk with the 946 // start of the central directory SHORT 947 // total number of entries in the 948 // central directory on this disk SHORT 949 // total number of entries in 950 // the central directory SHORT 951 // size of the central directory WORD 952 // offset of start of central 953 // directory with respect to 954 // the starting disk number WORD 955 // .ZIP file comment length SHORT 956 // .ZIP file comment up to 64KB 957 // 958 959 /** 960 * Reads the stream until it find the "End of central directory 961 * record" and consumes it as well. 962 */ 963 private void skipRemainderOfArchive() throws IOException { 964 // skip over central directory. One LFH has been read too much 965 // already. The calculation discounts file names and extra 966 // data so it will be too short. 967 realSkip((long) entriesRead * CFH_LEN - LFH_LEN); 968 findEocdRecord(); 969 realSkip((long) ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */); 970 readFully(shortBuf); 971 // file comment 972 realSkip(ZipShort.getValue(shortBuf)); 973 } 974 975 /** 976 * Reads forward until the signature of the "End of central 977 * directory" record is found. 978 */ 979 private void findEocdRecord() throws IOException { 980 int currentByte = -1; 981 boolean skipReadCall = false; 982 while (skipReadCall || (currentByte = readOneByte()) > -1) { 983 skipReadCall = false; 984 if (!isFirstByteOfEocdSig(currentByte)) { 985 continue; 986 } 987 currentByte = readOneByte(); 988 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) { 989 if (currentByte == -1) { 990 break; 991 } 992 skipReadCall = isFirstByteOfEocdSig(currentByte); 993 continue; 994 } 995 currentByte = readOneByte(); 996 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) { 997 if (currentByte == -1) { 998 break; 999 } 1000 skipReadCall = isFirstByteOfEocdSig(currentByte); 1001 continue; 1002 } 1003 currentByte = readOneByte(); 1004 if (currentByte == -1 1005 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) { 1006 break; 1007 } 1008 skipReadCall = isFirstByteOfEocdSig(currentByte); 1009 } 1010 } 1011 1012 /** 1013 * Skips bytes by reading from the underlying stream rather than 1014 * the (potentially inflating) archive stream - which {@link 1015 * #skip} would do. 1016 * 1017 * Also updates bytes-read counter. 1018 */ 1019 private void realSkip(final long value) throws IOException { 1020 if (value >= 0) { 1021 long skipped = 0; 1022 while (skipped < value) { 1023 final long rem = value - skipped; 1024 final int x = in.read(skipBuf, 0, (int) (skipBuf.length > rem ? rem : skipBuf.length)); 1025 if (x == -1) { 1026 return; 1027 } 1028 count(x); 1029 skipped += x; 1030 } 1031 return; 1032 } 1033 throw new IllegalArgumentException(); 1034 } 1035 1036 /** 1037 * Reads bytes by reading from the underlying stream rather than 1038 * the (potentially inflating) archive stream - which {@link #read} would do. 1039 * 1040 * Also updates bytes-read counter. 1041 */ 1042 private int readOneByte() throws IOException { 1043 final int b = in.read(); 1044 if (b != -1) { 1045 count(1); 1046 } 1047 return b; 1048 } 1049 1050 private boolean isFirstByteOfEocdSig(final int b) { 1051 return b == ZipArchiveOutputStream.EOCD_SIG[0]; 1052 } 1053 1054 /** 1055 * Structure collecting information for the entry that is 1056 * currently being read. 1057 */ 1058 private static final class CurrentEntry { 1059 1060 /** 1061 * Current ZIP entry. 1062 */ 1063 private final ZipArchiveEntry entry = new ZipArchiveEntry(); 1064 1065 /** 1066 * Does the entry use a data descriptor? 1067 */ 1068 private boolean hasDataDescriptor; 1069 1070 /** 1071 * Does the entry have a ZIP64 extended information extra field. 1072 */ 1073 private boolean usesZip64; 1074 1075 /** 1076 * Number of bytes of entry content read by the client if the 1077 * entry is STORED. 1078 */ 1079 private long bytesRead; 1080 1081 /** 1082 * Number of bytes of entry content read so from the stream. 1083 * 1084 * <p>This may be more than the actual entry's length as some 1085 * stuff gets buffered up and needs to be pushed back when the 1086 * end of the entry has been reached.</p> 1087 */ 1088 private long bytesReadFromStream; 1089 1090 /** 1091 * The checksum calculated as the current entry is read. 1092 */ 1093 private final CRC32 crc = new CRC32(); 1094 1095 /** 1096 * The input stream decompressing the data for shrunk and imploded entries. 1097 */ 1098 private InputStream in; 1099 } 1100 1101 /** 1102 * Bounded input stream adapted from commons-io 1103 */ 1104 private class BoundedInputStream extends InputStream { 1105 1106 /** the wrapped input stream */ 1107 private final InputStream in; 1108 1109 /** the max length to provide */ 1110 private final long max; 1111 1112 /** the number of bytes already returned */ 1113 private long pos = 0; 1114 1115 /** 1116 * Creates a new <code>BoundedInputStream</code> that wraps the given input 1117 * stream and limits it to a certain size. 1118 * 1119 * @param in The wrapped input stream 1120 * @param size The maximum number of bytes to return 1121 */ 1122 public BoundedInputStream(final InputStream in, final long size) { 1123 this.max = size; 1124 this.in = in; 1125 } 1126 1127 @Override 1128 public int read() throws IOException { 1129 if (max >= 0 && pos >= max) { 1130 return -1; 1131 } 1132 final int result = in.read(); 1133 pos++; 1134 count(1); 1135 current.bytesReadFromStream++; 1136 return result; 1137 } 1138 1139 @Override 1140 public int read(final byte[] b) throws IOException { 1141 return this.read(b, 0, b.length); 1142 } 1143 1144 @Override 1145 public int read(final byte[] b, final int off, final int len) throws IOException { 1146 if (max >= 0 && pos >= max) { 1147 return -1; 1148 } 1149 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len; 1150 final int bytesRead = in.read(b, off, (int) maxRead); 1151 1152 if (bytesRead == -1) { 1153 return -1; 1154 } 1155 1156 pos += bytesRead; 1157 count(bytesRead); 1158 current.bytesReadFromStream += bytesRead; 1159 return bytesRead; 1160 } 1161 1162 @Override 1163 public long skip(final long n) throws IOException { 1164 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n; 1165 final long skippedBytes = in.skip(toSkip); 1166 pos += skippedBytes; 1167 return skippedBytes; 1168 } 1169 1170 @Override 1171 public int available() throws IOException { 1172 if (max >= 0 && pos >= max) { 1173 return 0; 1174 } 1175 return in.available(); 1176 } 1177 } 1178}