001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.snappy; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.PushbackInputStream; 024import java.util.Arrays; 025 026import org.apache.commons.compress.compressors.CompressorInputStream; 027import org.apache.commons.compress.utils.BoundedInputStream; 028import org.apache.commons.compress.utils.IOUtils; 029 030/** 031 * CompressorInputStream for the framing Snappy format. 032 * 033 * <p>Based on the "spec" in the version "Last revised: 2013-10-25"</p> 034 * 035 * @see <a href="http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt">Snappy framing format description</a> 036 * @since 1.7 037 */ 038public class FramedSnappyCompressorInputStream extends CompressorInputStream { 039 /** 040 * package private for tests only. 041 */ 042 static final long MASK_OFFSET = 0xa282ead8L; 043 044 private static final int STREAM_IDENTIFIER_TYPE = 0xff; 045 private static final int COMPRESSED_CHUNK_TYPE = 0; 046 private static final int UNCOMPRESSED_CHUNK_TYPE = 1; 047 private static final int PADDING_CHUNK_TYPE = 0xfe; 048 private static final int MIN_UNSKIPPABLE_TYPE = 2; 049 private static final int MAX_UNSKIPPABLE_TYPE = 0x7f; 050 private static final int MAX_SKIPPABLE_TYPE = 0xfd; 051 052 private static final byte[] SZ_SIGNATURE = new byte[] { 053 (byte) STREAM_IDENTIFIER_TYPE, // tag 054 6, 0, 0, // length 055 's', 'N', 'a', 'P', 'p', 'Y' 056 }; 057 058 /** The underlying stream to read compressed data from */ 059 private final PushbackInputStream in; 060 061 private SnappyCompressorInputStream currentCompressedChunk; 062 063 // used in no-arg read method 064 private final byte[] oneByte = new byte[1]; 065 066 private boolean endReached, inUncompressedChunk; 067 068 private int uncompressedBytesRemaining; 069 private long expectedChecksum = -1; 070 private final PureJavaCrc32C checksum = new PureJavaCrc32C(); 071 072 /** 073 * Constructs a new input stream that decompresses snappy-framed-compressed data 074 * from the specified input stream. 075 * @param in the InputStream from which to read the compressed data 076 * @throws IOException if reading fails 077 */ 078 public FramedSnappyCompressorInputStream(InputStream in) throws IOException { 079 this.in = new PushbackInputStream(in, 1); 080 readStreamIdentifier(); 081 } 082 083 /** {@inheritDoc} */ 084 @Override 085 public int read() throws IOException { 086 return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; 087 } 088 089 /** {@inheritDoc} */ 090 @Override 091 public void close() throws IOException { 092 if (currentCompressedChunk != null) { 093 currentCompressedChunk.close(); 094 currentCompressedChunk = null; 095 } 096 in.close(); 097 } 098 099 /** {@inheritDoc} */ 100 @Override 101 public int read(byte[] b, int off, int len) throws IOException { 102 int read = readOnce(b, off, len); 103 if (read == -1) { 104 readNextBlock(); 105 if (endReached) { 106 return -1; 107 } 108 read = readOnce(b, off, len); 109 } 110 return read; 111 } 112 113 /** {@inheritDoc} */ 114 @Override 115 public int available() throws IOException { 116 if (inUncompressedChunk) { 117 return Math.min(uncompressedBytesRemaining, 118 in.available()); 119 } else if (currentCompressedChunk != null) { 120 return currentCompressedChunk.available(); 121 } 122 return 0; 123 } 124 125 /** 126 * Read from the current chunk into the given array. 127 * 128 * @return -1 if there is no current chunk or the number of bytes 129 * read from the current chunk (which may be -1 if the end of the 130 * chunk is reached). 131 */ 132 private int readOnce(byte[] b, int off, int len) throws IOException { 133 int read = -1; 134 if (inUncompressedChunk) { 135 int amount = Math.min(uncompressedBytesRemaining, len); 136 if (amount == 0) { 137 return -1; 138 } 139 read = in.read(b, off, amount); 140 if (read != -1) { 141 uncompressedBytesRemaining -= read; 142 count(read); 143 } 144 } else if (currentCompressedChunk != null) { 145 long before = currentCompressedChunk.getBytesRead(); 146 read = currentCompressedChunk.read(b, off, len); 147 if (read == -1) { 148 currentCompressedChunk.close(); 149 currentCompressedChunk = null; 150 } else { 151 count(currentCompressedChunk.getBytesRead() - before); 152 } 153 } 154 if (read > 0) { 155 checksum.update(b, off, read); 156 } 157 return read; 158 } 159 160 private void readNextBlock() throws IOException { 161 verifyLastChecksumAndReset(); 162 inUncompressedChunk = false; 163 int type = readOneByte(); 164 if (type == -1) { 165 endReached = true; 166 } else if (type == STREAM_IDENTIFIER_TYPE) { 167 in.unread(type); 168 pushedBackBytes(1); 169 readStreamIdentifier(); 170 readNextBlock(); 171 } else if (type == PADDING_CHUNK_TYPE 172 || (type > MAX_UNSKIPPABLE_TYPE && type <= MAX_SKIPPABLE_TYPE)) { 173 skipBlock(); 174 readNextBlock(); 175 } else if (type >= MIN_UNSKIPPABLE_TYPE && type <= MAX_UNSKIPPABLE_TYPE) { 176 throw new IOException("unskippable chunk with type " + type 177 + " (hex " + Integer.toHexString(type) + ")" 178 + " detected."); 179 } else if (type == UNCOMPRESSED_CHUNK_TYPE) { 180 inUncompressedChunk = true; 181 uncompressedBytesRemaining = readSize() - 4 /* CRC */; 182 expectedChecksum = unmask(readCrc()); 183 } else if (type == COMPRESSED_CHUNK_TYPE) { 184 long size = readSize() - 4 /* CRC */; 185 expectedChecksum = unmask(readCrc()); 186 currentCompressedChunk = 187 new SnappyCompressorInputStream(new BoundedInputStream(in, size)); 188 // constructor reads uncompressed size 189 count(currentCompressedChunk.getBytesRead()); 190 } else { 191 // impossible as all potential byte values have been covered 192 throw new IOException("unknown chunk type " + type 193 + " detected."); 194 } 195 } 196 197 private long readCrc() throws IOException { 198 byte[] b = new byte[4]; 199 int read = IOUtils.readFully(in, b); 200 count(read); 201 if (read != 4) { 202 throw new IOException("premature end of stream"); 203 } 204 long crc = 0; 205 for (int i = 0; i < 4; i++) { 206 crc |= (b[i] & 0xFFL) << (8 * i); 207 } 208 return crc; 209 } 210 211 static long unmask(long x) { 212 // ugly, maybe we should just have used ints and deal with the 213 // overflow 214 x -= MASK_OFFSET; 215 x &= 0xffffFFFFL; 216 return ((x >> 17) | (x << 15)) & 0xffffFFFFL; 217 } 218 219 private int readSize() throws IOException { 220 int b = 0; 221 int sz = 0; 222 for (int i = 0; i < 3; i++) { 223 b = readOneByte(); 224 if (b == -1) { 225 throw new IOException("premature end of stream"); 226 } 227 sz |= (b << (i * 8)); 228 } 229 return sz; 230 } 231 232 private void skipBlock() throws IOException { 233 int size = readSize(); 234 long read = IOUtils.skip(in, size); 235 count(read); 236 if (read != size) { 237 throw new IOException("premature end of stream"); 238 } 239 } 240 241 private void readStreamIdentifier() throws IOException { 242 byte[] b = new byte[10]; 243 int read = IOUtils.readFully(in, b); 244 count(read); 245 if (10 != read || !matches(b, 10)) { 246 throw new IOException("Not a framed Snappy stream"); 247 } 248 } 249 250 private int readOneByte() throws IOException { 251 int b = in.read(); 252 if (b != -1) { 253 count(1); 254 return b & 0xFF; 255 } 256 return -1; 257 } 258 259 private void verifyLastChecksumAndReset() throws IOException { 260 if (expectedChecksum >= 0 && expectedChecksum != checksum.getValue()) { 261 throw new IOException("Checksum verification failed"); 262 } 263 expectedChecksum = -1; 264 checksum.reset(); 265 } 266 267 /** 268 * Checks if the signature matches what is expected for a .sz file. 269 * 270 * <p>.sz files start with a chunk with tag 0xff and content sNaPpY.</p> 271 * 272 * @param signature the bytes to check 273 * @param length the number of bytes to check 274 * @return true if this is a .sz stream, false otherwise 275 */ 276 public static boolean matches(byte[] signature, int length) { 277 278 if (length < SZ_SIGNATURE.length) { 279 return false; 280 } 281 282 byte[] shortenedSig = signature; 283 if (signature.length > SZ_SIGNATURE.length) { 284 shortenedSig = new byte[SZ_SIGNATURE.length]; 285 System.arraycopy(signature, 0, shortenedSig, 0, SZ_SIGNATURE.length); 286 } 287 288 return Arrays.equals(shortenedSig, SZ_SIGNATURE); 289 } 290 291}