001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019
020/*
021 * This package is based on the work done by Keiron Liddle, Aftex Software
022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
023 * great code.
024 */
025package org.apache.commons.compress.compressors.bzip2;
026
027import java.io.IOException;
028import java.io.InputStream;
029
030import org.apache.commons.compress.compressors.CompressorInputStream;
031
032/**
033 * An input stream that decompresses from the BZip2 format to be read as any other stream.
034 * 
035 * @NotThreadSafe
036 */
037public class BZip2CompressorInputStream extends CompressorInputStream implements
038                                                                          BZip2Constants {
039
040    /**
041     * Index of the last char in the block, so the block size == last + 1.
042     */
043    private int last;
044
045    /**
046     * Index in zptr[] of original string after sorting.
047     */
048    private int origPtr;
049
050    /**
051     * always: in the range 0 .. 9. The current block size is 100000 * this
052     * number.
053     */
054    private int blockSize100k;
055
056    private boolean blockRandomised;
057
058    private int bsBuff;
059    private int bsLive;
060    private final CRC crc = new CRC();
061
062    private int nInUse;
063
064    private InputStream in;
065    private final boolean decompressConcatenated;
066
067    private static final int EOF = 0;
068    private static final int START_BLOCK_STATE = 1;
069    private static final int RAND_PART_A_STATE = 2;
070    private static final int RAND_PART_B_STATE = 3;
071    private static final int RAND_PART_C_STATE = 4;
072    private static final int NO_RAND_PART_A_STATE = 5;
073    private static final int NO_RAND_PART_B_STATE = 6;
074    private static final int NO_RAND_PART_C_STATE = 7;
075
076    private int currentState = START_BLOCK_STATE;
077
078    private int storedBlockCRC, storedCombinedCRC;
079    private int computedBlockCRC, computedCombinedCRC;
080
081    // Variables used by setup* methods exclusively
082
083    private int su_count;
084    private int su_ch2;
085    private int su_chPrev;
086    private int su_i2;
087    private int su_j2;
088    private int su_rNToGo;
089    private int su_rTPos;
090    private int su_tPos;
091    private char su_z;
092
093    /**
094     * All memory intensive stuff. This field is initialized by initBlock().
095     */
096    private BZip2CompressorInputStream.Data data;
097
098    /**
099     * Constructs a new BZip2CompressorInputStream which decompresses bytes
100     * read from the specified stream. This doesn't suppprt decompressing
101     * concatenated .bz2 files.
102     * 
103     * @param in the InputStream from which this object should be created
104     * @throws IOException
105     *             if the stream content is malformed or an I/O error occurs.
106     * @throws NullPointerException
107     *             if {@code in == null}
108     */
109    public BZip2CompressorInputStream(final InputStream in) throws IOException {
110        this(in, false);
111    }
112
113    /**
114     * Constructs a new BZip2CompressorInputStream which decompresses bytes
115     * read from the specified stream.
116     *
117     * @param in the InputStream from which this object should be created
118     * @param decompressConcatenated
119     *                     if true, decompress until the end of the input;
120     *                     if false, stop after the first .bz2 stream and
121     *                     leave the input position to point to the next
122     *                     byte after the .bz2 stream
123     *
124     * @throws IOException
125     *             if the stream content is malformed or an I/O error occurs.
126     * @throws NullPointerException
127     *             if {@code in == null}
128     */
129    public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
130        this.in = in;
131        this.decompressConcatenated = decompressConcatenated;
132
133        init(true);
134        initBlock();
135    }
136
137    @Override
138    public int read() throws IOException {
139        if (this.in != null) {
140            int r = read0();
141            count(r < 0 ? -1 : 1);
142            return r;
143        } else {
144            throw new IOException("stream closed");
145        }
146    }
147
148    /*
149     * (non-Javadoc)
150     * 
151     * @see java.io.InputStream#read(byte[], int, int)
152     */
153    @Override
154    public int read(final byte[] dest, final int offs, final int len)
155        throws IOException {
156        if (offs < 0) {
157            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
158        }
159        if (len < 0) {
160            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
161        }
162        if (offs + len > dest.length) {
163            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
164                                                + len + ") > dest.length(" + dest.length + ").");
165        }
166        if (this.in == null) {
167            throw new IOException("stream closed");
168        }
169        if (len == 0) {
170            return 0;
171        }
172
173        final int hi = offs + len;
174        int destOffs = offs;
175        int b;
176        while (destOffs < hi && ((b = read0()) >= 0)) {
177            dest[destOffs++] = (byte) b;
178            count(1);
179        }
180
181        int c = (destOffs == offs) ? -1 : (destOffs - offs);
182        return c;
183    }
184
185    private void makeMaps() {
186        final boolean[] inUse = this.data.inUse;
187        final byte[] seqToUnseq = this.data.seqToUnseq;
188
189        int nInUseShadow = 0;
190
191        for (int i = 0; i < 256; i++) {
192            if (inUse[i]) {
193                seqToUnseq[nInUseShadow++] = (byte) i;
194            }
195        }
196
197        this.nInUse = nInUseShadow;
198    }
199
200    private int read0() throws IOException {
201        switch (currentState) {
202        case EOF:
203            return -1;
204
205        case START_BLOCK_STATE:
206            return setupBlock();
207
208        case RAND_PART_A_STATE:
209            throw new IllegalStateException();
210
211        case RAND_PART_B_STATE:
212            return setupRandPartB();
213
214        case RAND_PART_C_STATE:
215            return setupRandPartC();
216
217        case NO_RAND_PART_A_STATE:
218            throw new IllegalStateException();
219
220        case NO_RAND_PART_B_STATE:
221            return setupNoRandPartB();
222
223        case NO_RAND_PART_C_STATE:
224            return setupNoRandPartC();
225
226        default:
227            throw new IllegalStateException();
228        }
229    }
230
231    private boolean init(boolean isFirstStream) throws IOException {
232        if (null == in) {
233            throw new IOException("No InputStream");
234        }
235
236        int magic0 = this.in.read();
237        if (magic0 == -1 && !isFirstStream) {
238            return false;
239        }
240        int magic1 = this.in.read();
241        int magic2 = this.in.read();
242
243        if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') {
244            throw new IOException(isFirstStream
245                    ? "Stream is not in the BZip2 format"
246                    : "Garbage after a valid BZip2 stream");
247        }
248
249        int blockSize = this.in.read();
250        if ((blockSize < '1') || (blockSize > '9')) {
251            throw new IOException("BZip2 block size is invalid");
252        }
253
254        this.blockSize100k = blockSize - '0';
255
256        this.bsLive = 0;
257        this.computedCombinedCRC = 0;
258
259        return true;
260    }
261
262    private void initBlock() throws IOException {
263        char magic0;
264        char magic1;
265        char magic2;
266        char magic3;
267        char magic4;
268        char magic5;
269
270        while (true) {
271            // Get the block magic bytes.
272            magic0 = bsGetUByte();
273            magic1 = bsGetUByte();
274            magic2 = bsGetUByte();
275            magic3 = bsGetUByte();
276            magic4 = bsGetUByte();
277            magic5 = bsGetUByte();
278
279            // If isn't end of stream magic, break out of the loop.
280            if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45
281                    || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) {
282                break;
283            }
284
285            // End of stream was reached. Check the combined CRC and
286            // advance to the next .bz2 stream if decoding concatenated
287            // streams.
288            if (complete()) {
289                return;
290            }
291        }
292
293        if (magic0 != 0x31 || // '1'
294            magic1 != 0x41 || // ')'
295            magic2 != 0x59 || // 'Y'
296            magic3 != 0x26 || // '&'
297            magic4 != 0x53 || // 'S'
298            magic5 != 0x59 // 'Y'
299            ) {
300            this.currentState = EOF;
301            throw new IOException("bad block header");
302        } else {
303            this.storedBlockCRC = bsGetInt();
304            this.blockRandomised = bsR(1) == 1;
305
306            /**
307             * Allocate data here instead in constructor, so we do not allocate
308             * it if the input file is empty.
309             */
310            if (this.data == null) {
311                this.data = new Data(this.blockSize100k);
312            }
313
314            // currBlockNo++;
315            getAndMoveToFrontDecode();
316
317            this.crc.initialiseCRC();
318            this.currentState = START_BLOCK_STATE;
319        }
320    }
321
322    private void endBlock() throws IOException {
323        this.computedBlockCRC = this.crc.getFinalCRC();
324
325        // A bad CRC is considered a fatal error.
326        if (this.storedBlockCRC != this.computedBlockCRC) {
327            // make next blocks readable without error
328            // (repair feature, not yet documented, not tested)
329            this.computedCombinedCRC = (this.storedCombinedCRC << 1)
330                | (this.storedCombinedCRC >>> 31);
331            this.computedCombinedCRC ^= this.storedBlockCRC;
332
333            throw new IOException("BZip2 CRC error");
334        }
335
336        this.computedCombinedCRC = (this.computedCombinedCRC << 1)
337            | (this.computedCombinedCRC >>> 31);
338        this.computedCombinedCRC ^= this.computedBlockCRC;
339    }
340
341    private boolean complete() throws IOException {
342        this.storedCombinedCRC = bsGetInt();
343        this.currentState = EOF;
344        this.data = null;
345
346        if (this.storedCombinedCRC != this.computedCombinedCRC) {
347            throw new IOException("BZip2 CRC error");
348        }
349
350        // Look for the next .bz2 stream if decompressing
351        // concatenated files.
352        return !decompressConcatenated || !init(false);
353    }
354
355    @Override
356    public void close() throws IOException {
357        InputStream inShadow = this.in;
358        if (inShadow != null) {
359            try {
360                if (inShadow != System.in) {
361                    inShadow.close();
362                }
363            } finally {
364                this.data = null;
365                this.in = null;
366            }
367        }
368    }
369
370    private int bsR(final int n) throws IOException {
371        int bsLiveShadow = this.bsLive;
372        int bsBuffShadow = this.bsBuff;
373
374        if (bsLiveShadow < n) {
375            final InputStream inShadow = this.in;
376            do {
377                int thech = inShadow.read();
378
379                if (thech < 0) {
380                    throw new IOException("unexpected end of stream");
381                }
382
383                bsBuffShadow = (bsBuffShadow << 8) | thech;
384                bsLiveShadow += 8;
385            } while (bsLiveShadow < n);
386
387            this.bsBuff = bsBuffShadow;
388        }
389
390        this.bsLive = bsLiveShadow - n;
391        return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
392    }
393
394    private boolean bsGetBit() throws IOException {
395        return bsR(1) != 0;
396    }
397
398    private char bsGetUByte() throws IOException {
399        return (char) bsR(8);
400    }
401
402    private int bsGetInt() throws IOException {
403        return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
404    }
405
406    /**
407     * Called by createHuffmanDecodingTables() exclusively.
408     */
409    private static void hbCreateDecodeTables(final int[] limit,
410                                             final int[] base, final int[] perm, final char[] length,
411                                             final int minLen, final int maxLen, final int alphaSize) {
412        for (int i = minLen, pp = 0; i <= maxLen; i++) {
413            for (int j = 0; j < alphaSize; j++) {
414                if (length[j] == i) {
415                    perm[pp++] = j;
416                }
417            }
418        }
419
420        for (int i = MAX_CODE_LEN; --i > 0;) {
421            base[i] = 0;
422            limit[i] = 0;
423        }
424
425        for (int i = 0; i < alphaSize; i++) {
426            base[length[i] + 1]++;
427        }
428
429        for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
430            b += base[i];
431            base[i] = b;
432        }
433
434        for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
435            final int nb = base[i + 1];
436            vec += nb - b;
437            b = nb;
438            limit[i] = vec - 1;
439            vec <<= 1;
440        }
441
442        for (int i = minLen + 1; i <= maxLen; i++) {
443            base[i] = ((limit[i - 1] + 1) << 1) - base[i];
444        }
445    }
446
447    private void recvDecodingTables() throws IOException {
448        final Data dataShadow = this.data;
449        final boolean[] inUse = dataShadow.inUse;
450        final byte[] pos = dataShadow.recvDecodingTables_pos;
451        final byte[] selector = dataShadow.selector;
452        final byte[] selectorMtf = dataShadow.selectorMtf;
453
454        int inUse16 = 0;
455
456        /* Receive the mapping table */
457        for (int i = 0; i < 16; i++) {
458            if (bsGetBit()) {
459                inUse16 |= 1 << i;
460            }
461        }
462
463        for (int i = 256; --i >= 0;) {
464            inUse[i] = false;
465        }
466
467        for (int i = 0; i < 16; i++) {
468            if ((inUse16 & (1 << i)) != 0) {
469                final int i16 = i << 4;
470                for (int j = 0; j < 16; j++) {
471                    if (bsGetBit()) {
472                        inUse[i16 + j] = true;
473                    }
474                }
475            }
476        }
477
478        makeMaps();
479        final int alphaSize = this.nInUse + 2;
480
481        /* Now the selectors */
482        final int nGroups = bsR(3);
483        final int nSelectors = bsR(15);
484
485        for (int i = 0; i < nSelectors; i++) {
486            int j = 0;
487            while (bsGetBit()) {
488                j++;
489            }
490            selectorMtf[i] = (byte) j;
491        }
492
493        /* Undo the MTF values for the selectors. */
494        for (int v = nGroups; --v >= 0;) {
495            pos[v] = (byte) v;
496        }
497
498        for (int i = 0; i < nSelectors; i++) {
499            int v = selectorMtf[i] & 0xff;
500            final byte tmp = pos[v];
501            while (v > 0) {
502                // nearly all times v is zero, 4 in most other cases
503                pos[v] = pos[v - 1];
504                v--;
505            }
506            pos[0] = tmp;
507            selector[i] = tmp;
508        }
509
510        final char[][] len = dataShadow.temp_charArray2d;
511
512        /* Now the coding tables */
513        for (int t = 0; t < nGroups; t++) {
514            int curr = bsR(5);
515            final char[] len_t = len[t];
516            for (int i = 0; i < alphaSize; i++) {
517                while (bsGetBit()) {
518                    curr += bsGetBit() ? -1 : 1;
519                }
520                len_t[i] = (char) curr;
521            }
522        }
523
524        // finally create the Huffman tables
525        createHuffmanDecodingTables(alphaSize, nGroups);
526    }
527
528    /**
529     * Called by recvDecodingTables() exclusively.
530     */
531    private void createHuffmanDecodingTables(final int alphaSize,
532                                             final int nGroups) {
533        final Data dataShadow = this.data;
534        final char[][] len = dataShadow.temp_charArray2d;
535        final int[] minLens = dataShadow.minLens;
536        final int[][] limit = dataShadow.limit;
537        final int[][] base = dataShadow.base;
538        final int[][] perm = dataShadow.perm;
539
540        for (int t = 0; t < nGroups; t++) {
541            int minLen = 32;
542            int maxLen = 0;
543            final char[] len_t = len[t];
544            for (int i = alphaSize; --i >= 0;) {
545                final char lent = len_t[i];
546                if (lent > maxLen) {
547                    maxLen = lent;
548                }
549                if (lent < minLen) {
550                    minLen = lent;
551                }
552            }
553            hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
554                                 maxLen, alphaSize);
555            minLens[t] = minLen;
556        }
557    }
558
559    private void getAndMoveToFrontDecode() throws IOException {
560        this.origPtr = bsR(24);
561        recvDecodingTables();
562
563        final InputStream inShadow = this.in;
564        final Data dataShadow = this.data;
565        final byte[] ll8 = dataShadow.ll8;
566        final int[] unzftab = dataShadow.unzftab;
567        final byte[] selector = dataShadow.selector;
568        final byte[] seqToUnseq = dataShadow.seqToUnseq;
569        final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
570        final int[] minLens = dataShadow.minLens;
571        final int[][] limit = dataShadow.limit;
572        final int[][] base = dataShadow.base;
573        final int[][] perm = dataShadow.perm;
574        final int limitLast = this.blockSize100k * 100000;
575
576        /*
577         * Setting up the unzftab entries here is not strictly necessary, but it
578         * does save having to do it later in a separate pass, and so saves a
579         * block's worth of cache misses.
580         */
581        for (int i = 256; --i >= 0;) {
582            yy[i] = (char) i;
583            unzftab[i] = 0;
584        }
585
586        int groupNo = 0;
587        int groupPos = G_SIZE - 1;
588        final int eob = this.nInUse + 1;
589        int nextSym = getAndMoveToFrontDecode0(0);
590        int bsBuffShadow = this.bsBuff;
591        int bsLiveShadow = this.bsLive;
592        int lastShadow = -1;
593        int zt = selector[groupNo] & 0xff;
594        int[] base_zt = base[zt];
595        int[] limit_zt = limit[zt];
596        int[] perm_zt = perm[zt];
597        int minLens_zt = minLens[zt];
598
599        while (nextSym != eob) {
600            if ((nextSym == RUNA) || (nextSym == RUNB)) {
601                int s = -1;
602
603                for (int n = 1; true; n <<= 1) {
604                    if (nextSym == RUNA) {
605                        s += n;
606                    } else if (nextSym == RUNB) {
607                        s += n << 1;
608                    } else {
609                        break;
610                    }
611
612                    if (groupPos == 0) {
613                        groupPos = G_SIZE - 1;
614                        zt = selector[++groupNo] & 0xff;
615                        base_zt = base[zt];
616                        limit_zt = limit[zt];
617                        perm_zt = perm[zt];
618                        minLens_zt = minLens[zt];
619                    } else {
620                        groupPos--;
621                    }
622
623                    int zn = minLens_zt;
624
625                    // Inlined:
626                    // int zvec = bsR(zn);
627                    while (bsLiveShadow < zn) {
628                        final int thech = inShadow.read();
629                        if (thech >= 0) {
630                            bsBuffShadow = (bsBuffShadow << 8) | thech;
631                            bsLiveShadow += 8;
632                            continue;
633                        } else {
634                            throw new IOException("unexpected end of stream");
635                        }
636                    }
637                    int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
638                        & ((1 << zn) - 1);
639                    bsLiveShadow -= zn;
640
641                    while (zvec > limit_zt[zn]) {
642                        zn++;
643                        while (bsLiveShadow < 1) {
644                            final int thech = inShadow.read();
645                            if (thech >= 0) {
646                                bsBuffShadow = (bsBuffShadow << 8) | thech;
647                                bsLiveShadow += 8;
648                                continue;
649                            } else {
650                                throw new IOException(
651                                                      "unexpected end of stream");
652                            }
653                        }
654                        bsLiveShadow--;
655                        zvec = (zvec << 1)
656                            | ((bsBuffShadow >> bsLiveShadow) & 1);
657                    }
658                    nextSym = perm_zt[zvec - base_zt[zn]];
659                }
660
661                final byte ch = seqToUnseq[yy[0]];
662                unzftab[ch & 0xff] += s + 1;
663
664                while (s-- >= 0) {
665                    ll8[++lastShadow] = ch;
666                }
667
668                if (lastShadow >= limitLast) {
669                    throw new IOException("block overrun");
670                }
671            } else {
672                if (++lastShadow >= limitLast) {
673                    throw new IOException("block overrun");
674                }
675
676                final char tmp = yy[nextSym - 1];
677                unzftab[seqToUnseq[tmp] & 0xff]++;
678                ll8[lastShadow] = seqToUnseq[tmp];
679
680                /*
681                 * This loop is hammered during decompression, hence avoid
682                 * native method call overhead of System.arraycopy for very
683                 * small ranges to copy.
684                 */
685                if (nextSym <= 16) {
686                    for (int j = nextSym - 1; j > 0;) {
687                        yy[j] = yy[--j];
688                    }
689                } else {
690                    System.arraycopy(yy, 0, yy, 1, nextSym - 1);
691                }
692
693                yy[0] = tmp;
694
695                if (groupPos == 0) {
696                    groupPos = G_SIZE - 1;
697                    zt = selector[++groupNo] & 0xff;
698                    base_zt = base[zt];
699                    limit_zt = limit[zt];
700                    perm_zt = perm[zt];
701                    minLens_zt = minLens[zt];
702                } else {
703                    groupPos--;
704                }
705
706                int zn = minLens_zt;
707
708                // Inlined:
709                // int zvec = bsR(zn);
710                while (bsLiveShadow < zn) {
711                    final int thech = inShadow.read();
712                    if (thech >= 0) {
713                        bsBuffShadow = (bsBuffShadow << 8) | thech;
714                        bsLiveShadow += 8;
715                        continue;
716                    } else {
717                        throw new IOException("unexpected end of stream");
718                    }
719                }
720                int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
721                    & ((1 << zn) - 1);
722                bsLiveShadow -= zn;
723
724                while (zvec > limit_zt[zn]) {
725                    zn++;
726                    while (bsLiveShadow < 1) {
727                        final int thech = inShadow.read();
728                        if (thech >= 0) {
729                            bsBuffShadow = (bsBuffShadow << 8) | thech;
730                            bsLiveShadow += 8;
731                            continue;
732                        } else {
733                            throw new IOException("unexpected end of stream");
734                        }
735                    }
736                    bsLiveShadow--;
737                    zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
738                }
739                nextSym = perm_zt[zvec - base_zt[zn]];
740            }
741        }
742
743        this.last = lastShadow;
744        this.bsLive = bsLiveShadow;
745        this.bsBuff = bsBuffShadow;
746    }
747
748    private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
749        final InputStream inShadow = this.in;
750        final Data dataShadow = this.data;
751        final int zt = dataShadow.selector[groupNo] & 0xff;
752        final int[] limit_zt = dataShadow.limit[zt];
753        int zn = dataShadow.minLens[zt];
754        int zvec = bsR(zn);
755        int bsLiveShadow = this.bsLive;
756        int bsBuffShadow = this.bsBuff;
757
758        while (zvec > limit_zt[zn]) {
759            zn++;
760            while (bsLiveShadow < 1) {
761                final int thech = inShadow.read();
762
763                if (thech >= 0) {
764                    bsBuffShadow = (bsBuffShadow << 8) | thech;
765                    bsLiveShadow += 8;
766                    continue;
767                } else {
768                    throw new IOException("unexpected end of stream");
769                }
770            }
771            bsLiveShadow--;
772            zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
773        }
774
775        this.bsLive = bsLiveShadow;
776        this.bsBuff = bsBuffShadow;
777
778        return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
779    }
780
781    private int setupBlock() throws IOException {
782        if (currentState == EOF || this.data == null) {
783            return -1;
784        }
785
786        final int[] cftab = this.data.cftab;
787        final int[] tt = this.data.initTT(this.last + 1);
788        final byte[] ll8 = this.data.ll8;
789        cftab[0] = 0;
790        System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
791
792        for (int i = 1, c = cftab[0]; i <= 256; i++) {
793            c += cftab[i];
794            cftab[i] = c;
795        }
796
797        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
798            tt[cftab[ll8[i] & 0xff]++] = i;
799        }
800
801        if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
802            throw new IOException("stream corrupted");
803        }
804
805        this.su_tPos = tt[this.origPtr];
806        this.su_count = 0;
807        this.su_i2 = 0;
808        this.su_ch2 = 256; /* not a char and not EOF */
809
810        if (this.blockRandomised) {
811            this.su_rNToGo = 0;
812            this.su_rTPos = 0;
813            return setupRandPartA();
814        }
815        return setupNoRandPartA();
816    }
817
818    private int setupRandPartA() throws IOException {
819        if (this.su_i2 <= this.last) {
820            this.su_chPrev = this.su_ch2;
821            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
822            this.su_tPos = this.data.tt[this.su_tPos];
823            if (this.su_rNToGo == 0) {
824                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
825                if (++this.su_rTPos == 512) {
826                    this.su_rTPos = 0;
827                }
828            } else {
829                this.su_rNToGo--;
830            }
831            this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
832            this.su_i2++;
833            this.currentState = RAND_PART_B_STATE;
834            this.crc.updateCRC(su_ch2Shadow);
835            return su_ch2Shadow;
836        } else {
837            endBlock();
838            initBlock();
839            return setupBlock();
840        }
841    }
842
843    private int setupNoRandPartA() throws IOException {
844        if (this.su_i2 <= this.last) {
845            this.su_chPrev = this.su_ch2;
846            int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
847            this.su_ch2 = su_ch2Shadow;
848            this.su_tPos = this.data.tt[this.su_tPos];
849            this.su_i2++;
850            this.currentState = NO_RAND_PART_B_STATE;
851            this.crc.updateCRC(su_ch2Shadow);
852            return su_ch2Shadow;
853        } else {
854            this.currentState = NO_RAND_PART_A_STATE;
855            endBlock();
856            initBlock();
857            return setupBlock();
858        }
859    }
860
861    private int setupRandPartB() throws IOException {
862        if (this.su_ch2 != this.su_chPrev) {
863            this.currentState = RAND_PART_A_STATE;
864            this.su_count = 1;
865            return setupRandPartA();
866        } else if (++this.su_count >= 4) {
867            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
868            this.su_tPos = this.data.tt[this.su_tPos];
869            if (this.su_rNToGo == 0) {
870                this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1;
871                if (++this.su_rTPos == 512) {
872                    this.su_rTPos = 0;
873                }
874            } else {
875                this.su_rNToGo--;
876            }
877            this.su_j2 = 0;
878            this.currentState = RAND_PART_C_STATE;
879            if (this.su_rNToGo == 1) {
880                this.su_z ^= 1;
881            }
882            return setupRandPartC();
883        } else {
884            this.currentState = RAND_PART_A_STATE;
885            return setupRandPartA();
886        }
887    }
888
889    private int setupRandPartC() throws IOException {
890        if (this.su_j2 < this.su_z) {
891            this.crc.updateCRC(this.su_ch2);
892            this.su_j2++;
893            return this.su_ch2;
894        } else {
895            this.currentState = RAND_PART_A_STATE;
896            this.su_i2++;
897            this.su_count = 0;
898            return setupRandPartA();
899        }
900    }
901
902    private int setupNoRandPartB() throws IOException {
903        if (this.su_ch2 != this.su_chPrev) {
904            this.su_count = 1;
905            return setupNoRandPartA();
906        } else if (++this.su_count >= 4) {
907            this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
908            this.su_tPos = this.data.tt[this.su_tPos];
909            this.su_j2 = 0;
910            return setupNoRandPartC();
911        } else {
912            return setupNoRandPartA();
913        }
914    }
915
916    private int setupNoRandPartC() throws IOException {
917        if (this.su_j2 < this.su_z) {
918            int su_ch2Shadow = this.su_ch2;
919            this.crc.updateCRC(su_ch2Shadow);
920            this.su_j2++;
921            this.currentState = NO_RAND_PART_C_STATE;
922            return su_ch2Shadow;
923        } else {
924            this.su_i2++;
925            this.su_count = 0;
926            return setupNoRandPartA();
927        }
928    }
929
930    private static final class Data {
931
932        // (with blockSize 900k)
933        final boolean[] inUse = new boolean[256]; // 256 byte
934
935        final byte[] seqToUnseq = new byte[256]; // 256 byte
936        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
937        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
938
939        /**
940         * Freq table collected to save a pass over the data during
941         * decompression.
942         */
943        final int[] unzftab = new int[256]; // 1024 byte
944
945        final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
946        final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
947        final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
948        final int[] minLens = new int[N_GROUPS]; // 24 byte
949
950        final int[] cftab = new int[257]; // 1028 byte
951        final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
952        final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
953        // byte
954        final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
955        // ---------------
956        // 60798 byte
957
958        int[] tt; // 3600000 byte
959        byte[] ll8; // 900000 byte
960
961        // ---------------
962        // 4560782 byte
963        // ===============
964
965        Data(int blockSize100k) {
966            this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE];
967        }
968
969        /**
970         * Initializes the {@link #tt} array.
971         * 
972         * This method is called when the required length of the array is known.
973         * I don't initialize it at construction time to avoid unneccessary
974         * memory allocation when compressing small files.
975         */
976        int[] initTT(int length) {
977            int[] ttShadow = this.tt;
978
979            // tt.length should always be >= length, but theoretically
980            // it can happen, if the compressor mixed small and large
981            // blocks. Normally only the last block will be smaller
982            // than others.
983            if ((ttShadow == null) || (ttShadow.length < length)) {
984                this.tt = ttShadow = new int[length];
985            }
986
987            return ttShadow;
988        }
989
990    }
991
992    /**
993     * Checks if the signature matches what is expected for a bzip2 file.
994     * 
995     * @param signature
996     *            the bytes to check
997     * @param length
998     *            the number of bytes to check
999     * @return true, if this stream is a bzip2 compressed stream, false otherwise
1000     * 
1001     * @since 1.1
1002     */
1003    public static boolean matches(byte[] signature, int length) {
1004
1005        if (length < 3) {
1006            return false;
1007        }
1008
1009        if (signature[0] != 'B') {
1010            return false;
1011        }
1012
1013        if (signature[1] != 'Z') {
1014            return false;
1015        }
1016
1017        if (signature[2] != 'h') {
1018            return false;
1019        }
1020
1021        return true;
1022    }
1023}