Mercurial > 510Connectbot
comparison src/com/jcraft/jzlib/Deflate.java @ 0:0ce5cc452d02
initial version
author | Carl Byington <carl@five-ten-sg.com> |
---|---|
date | Thu, 22 May 2014 10:41:19 -0700 |
parents | |
children | 46c2115ae1c8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0ce5cc452d02 |
---|---|
1 /* -*-mode:java; c-basic-offset:2; -*- */ | |
2 /* | |
3 Copyright (c) 2000,2001,2002,2003 ymnk, JCraft,Inc. All rights reserved. | |
4 | |
5 Redistribution and use in source and binary forms, with or without | |
6 modification, are permitted provided that the following conditions are met: | |
7 | |
8 1. Redistributions of source code must retain the above copyright notice, | |
9 this list of conditions and the following disclaimer. | |
10 | |
11 2. Redistributions in binary form must reproduce the above copyright | |
12 notice, this list of conditions and the following disclaimer in | |
13 the documentation and/or other materials provided with the distribution. | |
14 | |
15 3. The names of the authors may not be used to endorse or promote products | |
16 derived from this software without specific prior written permission. | |
17 | |
18 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, | |
19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
20 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT, | |
21 INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, | |
22 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, | |
24 OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | |
27 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 */ | |
29 /* | |
30 * This program is based on zlib-1.1.3, so all credit should go authors | |
31 * Jean-loup Gailly(jloup@gzip.org) and Mark Adler(madler@alumni.caltech.edu) | |
32 * and contributors of zlib. | |
33 */ | |
34 | |
35 package com.jcraft.jzlib; | |
36 | |
37 public | |
38 final class Deflate { | |
39 | |
40 static final private int MAX_MEM_LEVEL = 9; | |
41 | |
42 static final private int Z_DEFAULT_COMPRESSION = -1; | |
43 | |
44 static final private int MAX_WBITS = 15; // 32K LZ77 window | |
45 static final private int DEF_MEM_LEVEL = 8; | |
46 | |
47 static class Config { | |
48 int good_length; // reduce lazy search above this match length | |
49 int max_lazy; // do not perform lazy search above this match length | |
50 int nice_length; // quit search above this match length | |
51 int max_chain; | |
52 int func; | |
53 Config(int good_length, int max_lazy, | |
54 int nice_length, int max_chain, int func) { | |
55 this.good_length = good_length; | |
56 this.max_lazy = max_lazy; | |
57 this.nice_length = nice_length; | |
58 this.max_chain = max_chain; | |
59 this.func = func; | |
60 } | |
61 } | |
62 | |
63 static final private int STORED = 0; | |
64 static final private int FAST = 1; | |
65 static final private int SLOW = 2; | |
66 static final private Config[] config_table; | |
67 static { | |
68 config_table = new Config[10]; | |
69 // good lazy nice chain | |
70 config_table[0] = new Config(0, 0, 0, 0, STORED); | |
71 config_table[1] = new Config(4, 4, 8, 4, FAST); | |
72 config_table[2] = new Config(4, 5, 16, 8, FAST); | |
73 config_table[3] = new Config(4, 6, 32, 32, FAST); | |
74 config_table[4] = new Config(4, 4, 16, 16, SLOW); | |
75 config_table[5] = new Config(8, 16, 32, 32, SLOW); | |
76 config_table[6] = new Config(8, 16, 128, 128, SLOW); | |
77 config_table[7] = new Config(8, 32, 128, 256, SLOW); | |
78 config_table[8] = new Config(32, 128, 258, 1024, SLOW); | |
79 config_table[9] = new Config(32, 258, 258, 4096, SLOW); | |
80 } | |
81 | |
82 static final private String[] z_errmsg = { | |
83 "need dictionary", // Z_NEED_DICT 2 | |
84 "stream end", // Z_STREAM_END 1 | |
85 "", // Z_OK 0 | |
86 "file error", // Z_ERRNO (-1) | |
87 "stream error", // Z_STREAM_ERROR (-2) | |
88 "data error", // Z_DATA_ERROR (-3) | |
89 "insufficient memory", // Z_MEM_ERROR (-4) | |
90 "buffer error", // Z_BUF_ERROR (-5) | |
91 "incompatible version",// Z_VERSION_ERROR (-6) | |
92 "" | |
93 }; | |
94 | |
95 // block not completed, need more input or more output | |
96 static final private int NeedMore = 0; | |
97 | |
98 // block flush performed | |
99 static final private int BlockDone = 1; | |
100 | |
101 // finish started, need only more output at next deflate | |
102 static final private int FinishStarted = 2; | |
103 | |
104 // finish done, accept no more input or output | |
105 static final private int FinishDone = 3; | |
106 | |
107 // preset dictionary flag in zlib header | |
108 static final private int PRESET_DICT = 0x20; | |
109 | |
110 static final private int Z_FILTERED = 1; | |
111 static final private int Z_HUFFMAN_ONLY = 2; | |
112 static final private int Z_DEFAULT_STRATEGY = 0; | |
113 | |
114 static final private int Z_NO_FLUSH = 0; | |
115 static final private int Z_PARTIAL_FLUSH = 1; | |
116 static final private int Z_SYNC_FLUSH = 2; | |
117 static final private int Z_FULL_FLUSH = 3; | |
118 static final private int Z_FINISH = 4; | |
119 | |
120 static final private int Z_OK = 0; | |
121 static final private int Z_STREAM_END = 1; | |
122 static final private int Z_NEED_DICT = 2; | |
123 static final private int Z_ERRNO = -1; | |
124 static final private int Z_STREAM_ERROR = -2; | |
125 static final private int Z_DATA_ERROR = -3; | |
126 static final private int Z_MEM_ERROR = -4; | |
127 static final private int Z_BUF_ERROR = -5; | |
128 static final private int Z_VERSION_ERROR = -6; | |
129 | |
130 static final private int INIT_STATE = 42; | |
131 static final private int BUSY_STATE = 113; | |
132 static final private int FINISH_STATE = 666; | |
133 | |
134 // The deflate compression method | |
135 static final private int Z_DEFLATED = 8; | |
136 | |
137 static final private int STORED_BLOCK = 0; | |
138 static final private int STATIC_TREES = 1; | |
139 static final private int DYN_TREES = 2; | |
140 | |
141 // The three kinds of block type | |
142 static final private int Z_BINARY = 0; | |
143 static final private int Z_ASCII = 1; | |
144 static final private int Z_UNKNOWN = 2; | |
145 | |
146 static final private int Buf_size = 8 * 2; | |
147 | |
148 // repeat previous bit length 3-6 times (2 bits of repeat count) | |
149 static final private int REP_3_6 = 16; | |
150 | |
151 // repeat a zero length 3-10 times (3 bits of repeat count) | |
152 static final private int REPZ_3_10 = 17; | |
153 | |
154 // repeat a zero length 11-138 times (7 bits of repeat count) | |
155 static final private int REPZ_11_138 = 18; | |
156 | |
157 static final private int MIN_MATCH = 3; | |
158 static final private int MAX_MATCH = 258; | |
159 static final private int MIN_LOOKAHEAD = (MAX_MATCH + MIN_MATCH + 1); | |
160 | |
161 static final private int MAX_BITS = 15; | |
162 static final private int D_CODES = 30; | |
163 static final private int BL_CODES = 19; | |
164 static final private int LENGTH_CODES = 29; | |
165 static final private int LITERALS = 256; | |
166 static final private int L_CODES = (LITERALS + 1 + LENGTH_CODES); | |
167 static final private int HEAP_SIZE = (2 * L_CODES + 1); | |
168 | |
169 static final private int END_BLOCK = 256; | |
170 | |
171 ZStream strm; // pointer back to this zlib stream | |
172 int status; // as the name implies | |
173 byte[] pending_buf; // output still pending | |
174 int pending_buf_size; // size of pending_buf | |
175 int pending_out; // next pending byte to output to the stream | |
176 int pending; // nb of bytes in the pending buffer | |
177 int noheader; // suppress zlib header and adler32 | |
178 byte data_type; // UNKNOWN, BINARY or ASCII | |
179 byte method; // STORED (for zip only) or DEFLATED | |
180 int last_flush; // value of flush param for previous deflate call | |
181 | |
182 int w_size; // LZ77 window size (32K by default) | |
183 int w_bits; // log2(w_size) (8..16) | |
184 int w_mask; // w_size - 1 | |
185 | |
186 byte[] window; | |
187 // Sliding window. Input bytes are read into the second half of the window, | |
188 // and move to the first half later to keep a dictionary of at least wSize | |
189 // bytes. With this organization, matches are limited to a distance of | |
190 // wSize-MAX_MATCH bytes, but this ensures that IO is always | |
191 // performed with a length multiple of the block size. Also, it limits | |
192 // the window size to 64K, which is quite useful on MSDOS. | |
193 // To do: use the user input buffer as sliding window. | |
194 | |
195 int window_size; | |
196 // Actual size of window: 2*wSize, except when the user input buffer | |
197 // is directly used as sliding window. | |
198 | |
199 short[] prev; | |
200 // Link to older string with same hash index. To limit the size of this | |
201 // array to 64K, this link is maintained only for the last 32K strings. | |
202 // An index in this array is thus a window index modulo 32K. | |
203 | |
204 short[] head; // Heads of the hash chains or NIL. | |
205 | |
206 int ins_h; // hash index of string to be inserted | |
207 int hash_size; // number of elements in hash table | |
208 int hash_bits; // log2(hash_size) | |
209 int hash_mask; // hash_size-1 | |
210 | |
211 // Number of bits by which ins_h must be shifted at each input | |
212 // step. It must be such that after MIN_MATCH steps, the oldest | |
213 // byte no longer takes part in the hash key, that is: | |
214 // hash_shift * MIN_MATCH >= hash_bits | |
215 int hash_shift; | |
216 | |
217 // Window position at the beginning of the current output block. Gets | |
218 // negative when the window is moved backwards. | |
219 | |
220 int block_start; | |
221 | |
222 int match_length; // length of best match | |
223 int prev_match; // previous match | |
224 int match_available; // set if previous match exists | |
225 int strstart; // start of string to insert | |
226 int match_start; // start of matching string | |
227 int lookahead; // number of valid bytes ahead in window | |
228 | |
229 // Length of the best match at previous step. Matches not greater than this | |
230 // are discarded. This is used in the lazy match evaluation. | |
231 int prev_length; | |
232 | |
233 // To speed up deflation, hash chains are never searched beyond this | |
234 // length. A higher limit improves compression ratio but degrades the speed. | |
235 int max_chain_length; | |
236 | |
237 // Attempt to find a better match only when the current match is strictly | |
238 // smaller than this value. This mechanism is used only for compression | |
239 // levels >= 4. | |
240 int max_lazy_match; | |
241 | |
242 // Insert new strings in the hash table only if the match length is not | |
243 // greater than this length. This saves time but degrades compression. | |
244 // max_insert_length is used only for compression levels <= 3. | |
245 | |
246 int level; // compression level (1..9) | |
247 int strategy; // favor or force Huffman coding | |
248 | |
249 // Use a faster search when the previous match is longer than this | |
250 int good_match; | |
251 | |
252 // Stop searching when current match exceeds this | |
253 int nice_match; | |
254 | |
255 short[] dyn_ltree; // literal and length tree | |
256 short[] dyn_dtree; // distance tree | |
257 short[] bl_tree; // Huffman tree for bit lengths | |
258 | |
259 Tree l_desc = new Tree(); // desc for literal tree | |
260 Tree d_desc = new Tree(); // desc for distance tree | |
261 Tree bl_desc = new Tree(); // desc for bit length tree | |
262 | |
263 // number of codes at each bit length for an optimal tree | |
264 short[] bl_count = new short[MAX_BITS + 1]; | |
265 | |
266 // heap used to build the Huffman trees | |
267 int[] heap = new int[2 * L_CODES + 1]; | |
268 | |
269 int heap_len; // number of elements in the heap | |
270 int heap_max; // element of largest frequency | |
271 // The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. | |
272 // The same heap array is used to build all trees. | |
273 | |
274 // Depth of each subtree used as tie breaker for trees of equal frequency | |
275 byte[] depth = new byte[2 * L_CODES + 1]; | |
276 | |
277 int l_buf; // index for literals or lengths */ | |
278 | |
279 // Size of match buffer for literals/lengths. There are 4 reasons for | |
280 // limiting lit_bufsize to 64K: | |
281 // - frequencies can be kept in 16 bit counters | |
282 // - if compression is not successful for the first block, all input | |
283 // data is still in the window so we can still emit a stored block even | |
284 // when input comes from standard input. (This can also be done for | |
285 // all blocks if lit_bufsize is not greater than 32K.) | |
286 // - if compression is not successful for a file smaller than 64K, we can | |
287 // even emit a stored file instead of a stored block (saving 5 bytes). | |
288 // This is applicable only for zip (not gzip or zlib). | |
289 // - creating new Huffman trees less frequently may not provide fast | |
290 // adaptation to changes in the input data statistics. (Take for | |
291 // example a binary file with poorly compressible code followed by | |
292 // a highly compressible string table.) Smaller buffer sizes give | |
293 // fast adaptation but have of course the overhead of transmitting | |
294 // trees more frequently. | |
295 // - I can't count above 4 | |
296 int lit_bufsize; | |
297 | |
298 int last_lit; // running index in l_buf | |
299 | |
300 // Buffer for distances. To simplify the code, d_buf and l_buf have | |
301 // the same number of elements. To use different lengths, an extra flag | |
302 // array would be necessary. | |
303 | |
304 int d_buf; // index of pendig_buf | |
305 | |
306 int opt_len; // bit length of current block with optimal trees | |
307 int static_len; // bit length of current block with static trees | |
308 int matches; // number of string matches in current block | |
309 int last_eob_len; // bit length of EOB code for last block | |
310 | |
311 // Output buffer. bits are inserted starting at the bottom (least | |
312 // significant bits). | |
313 short bi_buf; | |
314 | |
315 // Number of valid bits in bi_buf. All bits above the last valid bit | |
316 // are always zero. | |
317 int bi_valid; | |
318 | |
319 Deflate() { | |
320 dyn_ltree = new short[HEAP_SIZE * 2]; | |
321 dyn_dtree = new short[(2 * D_CODES + 1) * 2]; // distance tree | |
322 bl_tree = new short[(2 * BL_CODES + 1) * 2]; // Huffman tree for bit lengths | |
323 } | |
324 | |
325 void lm_init() { | |
326 window_size = 2 * w_size; | |
327 head[hash_size - 1] = 0; | |
328 | |
329 for (int i = 0; i < hash_size - 1; i++) { | |
330 head[i] = 0; | |
331 } | |
332 | |
333 // Set the default configuration parameters: | |
334 max_lazy_match = Deflate.config_table[level].max_lazy; | |
335 good_match = Deflate.config_table[level].good_length; | |
336 nice_match = Deflate.config_table[level].nice_length; | |
337 max_chain_length = Deflate.config_table[level].max_chain; | |
338 strstart = 0; | |
339 block_start = 0; | |
340 lookahead = 0; | |
341 match_length = prev_length = MIN_MATCH - 1; | |
342 match_available = 0; | |
343 ins_h = 0; | |
344 } | |
345 | |
346 // Initialize the tree data structures for a new zlib stream. | |
347 void tr_init() { | |
348 l_desc.dyn_tree = dyn_ltree; | |
349 l_desc.stat_desc = StaticTree.static_l_desc; | |
350 d_desc.dyn_tree = dyn_dtree; | |
351 d_desc.stat_desc = StaticTree.static_d_desc; | |
352 bl_desc.dyn_tree = bl_tree; | |
353 bl_desc.stat_desc = StaticTree.static_bl_desc; | |
354 bi_buf = 0; | |
355 bi_valid = 0; | |
356 last_eob_len = 8; // enough lookahead for inflate | |
357 // Initialize the first block of the first file: | |
358 init_block(); | |
359 } | |
360 | |
361 void init_block() { | |
362 // Initialize the trees. | |
363 for (int i = 0; i < L_CODES; i++) dyn_ltree[i * 2] = 0; | |
364 | |
365 for (int i = 0; i < D_CODES; i++) dyn_dtree[i * 2] = 0; | |
366 | |
367 for (int i = 0; i < BL_CODES; i++) bl_tree[i * 2] = 0; | |
368 | |
369 dyn_ltree[END_BLOCK * 2] = 1; | |
370 opt_len = static_len = 0; | |
371 last_lit = matches = 0; | |
372 } | |
373 | |
374 // Restore the heap property by moving down the tree starting at node k, | |
375 // exchanging a node with the smallest of its two sons if necessary, stopping | |
376 // when the heap property is re-established (each father smaller than its | |
377 // two sons). | |
378 void pqdownheap(short[] tree, // the tree to restore | |
379 int k // node to move down | |
380 ) { | |
381 int v = heap[k]; | |
382 int j = k << 1; // left son of k | |
383 | |
384 while (j <= heap_len) { | |
385 // Set j to the smallest of the two sons: | |
386 if (j < heap_len && | |
387 smaller(tree, heap[j + 1], heap[j], depth)) { | |
388 j++; | |
389 } | |
390 | |
391 // Exit if v is smaller than both sons | |
392 if (smaller(tree, v, heap[j], depth)) break; | |
393 | |
394 // Exchange v with the smallest son | |
395 heap[k] = heap[j]; k = j; | |
396 // And continue down the tree, setting j to the left son of k | |
397 j <<= 1; | |
398 } | |
399 | |
400 heap[k] = v; | |
401 } | |
402 | |
403 static boolean smaller(short[] tree, int n, int m, byte[] depth) { | |
404 short tn2 = tree[n * 2]; | |
405 short tm2 = tree[m * 2]; | |
406 return (tn2 < tm2 || | |
407 (tn2 == tm2 && depth[n] <= depth[m])); | |
408 } | |
409 | |
410 // Scan a literal or distance tree to determine the frequencies of the codes | |
411 // in the bit length tree. | |
412 void scan_tree(short[] tree, // the tree to be scanned | |
413 int max_code // and its largest code of non zero frequency | |
414 ) { | |
415 int n; // iterates over all tree elements | |
416 int prevlen = -1; // last emitted length | |
417 int curlen; // length of current code | |
418 int nextlen = tree[0 * 2 + 1]; // length of next code | |
419 int count = 0; // repeat count of the current code | |
420 int max_count = 7; // max repeat count | |
421 int min_count = 4; // min repeat count | |
422 | |
423 if (nextlen == 0) { max_count = 138; min_count = 3; } | |
424 | |
425 tree[(max_code + 1) * 2 + 1] = (short)0xffff; // guard | |
426 | |
427 for (n = 0; n <= max_code; n++) { | |
428 curlen = nextlen; nextlen = tree[(n + 1) * 2 + 1]; | |
429 | |
430 if (++count < max_count && curlen == nextlen) { | |
431 continue; | |
432 } | |
433 else if (count < min_count) { | |
434 bl_tree[curlen * 2] += count; | |
435 } | |
436 else if (curlen != 0) { | |
437 if (curlen != prevlen) bl_tree[curlen * 2]++; | |
438 | |
439 bl_tree[REP_3_6 * 2]++; | |
440 } | |
441 else if (count <= 10) { | |
442 bl_tree[REPZ_3_10 * 2]++; | |
443 } | |
444 else { | |
445 bl_tree[REPZ_11_138 * 2]++; | |
446 } | |
447 | |
448 count = 0; prevlen = curlen; | |
449 | |
450 if (nextlen == 0) { | |
451 max_count = 138; min_count = 3; | |
452 } | |
453 else if (curlen == nextlen) { | |
454 max_count = 6; min_count = 3; | |
455 } | |
456 else { | |
457 max_count = 7; min_count = 4; | |
458 } | |
459 } | |
460 } | |
461 | |
462 // Construct the Huffman tree for the bit lengths and return the index in | |
463 // bl_order of the last bit length code to send. | |
464 int build_bl_tree() { | |
465 int max_blindex; // index of last bit length code of non zero freq | |
466 // Determine the bit length frequencies for literal and distance trees | |
467 scan_tree(dyn_ltree, l_desc.max_code); | |
468 scan_tree(dyn_dtree, d_desc.max_code); | |
469 // Build the bit length tree: | |
470 bl_desc.build_tree(this); | |
471 | |
472 // opt_len now includes the length of the tree representations, except | |
473 // the lengths of the bit lengths codes and the 5+5+4 bits for the counts. | |
474 // Determine the number of bit length codes to send. The pkzip format | |
475 // requires that at least 4 bit length codes be sent. (appnote.txt says | |
476 // 3 but the actual value used is 4.) | |
477 for (max_blindex = BL_CODES - 1; max_blindex >= 3; max_blindex--) { | |
478 if (bl_tree[Tree.bl_order[max_blindex] * 2 + 1] != 0) break; | |
479 } | |
480 | |
481 // Update opt_len to include the bit length tree and counts | |
482 opt_len += 3 * (max_blindex + 1) + 5 + 5 + 4; | |
483 return max_blindex; | |
484 } | |
485 | |
486 | |
487 // Send the header for a block using dynamic Huffman trees: the counts, the | |
488 // lengths of the bit length codes, the literal tree and the distance tree. | |
489 // IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. | |
490 void send_all_trees(int lcodes, int dcodes, int blcodes) { | |
491 int rank; // index in bl_order | |
492 send_bits(lcodes - 257, 5); // not +255 as stated in appnote.txt | |
493 send_bits(dcodes - 1, 5); | |
494 send_bits(blcodes - 4, 4); // not -3 as stated in appnote.txt | |
495 | |
496 for (rank = 0; rank < blcodes; rank++) { | |
497 send_bits(bl_tree[Tree.bl_order[rank] * 2 + 1], 3); | |
498 } | |
499 | |
500 send_tree(dyn_ltree, lcodes - 1); // literal tree | |
501 send_tree(dyn_dtree, dcodes - 1); // distance tree | |
502 } | |
503 | |
504 // Send a literal or distance tree in compressed form, using the codes in | |
505 // bl_tree. | |
506 void send_tree(short[] tree, // the tree to be sent | |
507 int max_code // and its largest code of non zero frequency | |
508 ) { | |
509 int n; // iterates over all tree elements | |
510 int prevlen = -1; // last emitted length | |
511 int curlen; // length of current code | |
512 int nextlen = tree[0 * 2 + 1]; // length of next code | |
513 int count = 0; // repeat count of the current code | |
514 int max_count = 7; // max repeat count | |
515 int min_count = 4; // min repeat count | |
516 | |
517 if (nextlen == 0) { max_count = 138; min_count = 3; } | |
518 | |
519 for (n = 0; n <= max_code; n++) { | |
520 curlen = nextlen; nextlen = tree[(n + 1) * 2 + 1]; | |
521 | |
522 if (++count < max_count && curlen == nextlen) { | |
523 continue; | |
524 } | |
525 else if (count < min_count) { | |
526 do { send_code(curlen, bl_tree); } | |
527 while (--count != 0); | |
528 } | |
529 else if (curlen != 0) { | |
530 if (curlen != prevlen) { | |
531 send_code(curlen, bl_tree); count--; | |
532 } | |
533 | |
534 send_code(REP_3_6, bl_tree); | |
535 send_bits(count - 3, 2); | |
536 } | |
537 else if (count <= 10) { | |
538 send_code(REPZ_3_10, bl_tree); | |
539 send_bits(count - 3, 3); | |
540 } | |
541 else { | |
542 send_code(REPZ_11_138, bl_tree); | |
543 send_bits(count - 11, 7); | |
544 } | |
545 | |
546 count = 0; prevlen = curlen; | |
547 | |
548 if (nextlen == 0) { | |
549 max_count = 138; min_count = 3; | |
550 } | |
551 else if (curlen == nextlen) { | |
552 max_count = 6; min_count = 3; | |
553 } | |
554 else { | |
555 max_count = 7; min_count = 4; | |
556 } | |
557 } | |
558 } | |
559 | |
560 // Output a byte on the stream. | |
561 // IN assertion: there is enough room in pending_buf. | |
562 final void put_byte(byte[] p, int start, int len) { | |
563 System.arraycopy(p, start, pending_buf, pending, len); | |
564 pending += len; | |
565 } | |
566 | |
567 final void put_byte(byte c) { | |
568 pending_buf[pending++] = c; | |
569 } | |
570 final void put_short(int w) { | |
571 put_byte((byte)(w/*&0xff*/)); | |
572 put_byte((byte)(w >>> 8)); | |
573 } | |
574 final void putShortMSB(int b) { | |
575 put_byte((byte)(b >> 8)); | |
576 put_byte((byte)(b/*&0xff*/)); | |
577 } | |
578 | |
579 final void send_code(int c, short[] tree) { | |
580 int c2 = c * 2; | |
581 send_bits((tree[c2] & 0xffff), (tree[c2 + 1] & 0xffff)); | |
582 } | |
583 | |
584 void send_bits(int value, int length) { | |
585 int len = length; | |
586 | |
587 if (bi_valid > (int)Buf_size - len) { | |
588 int val = value; | |
589 // bi_buf |= (val << bi_valid); | |
590 bi_buf |= ((val << bi_valid) & 0xffff); | |
591 put_short(bi_buf); | |
592 bi_buf = (short)(val >>> (Buf_size - bi_valid)); | |
593 bi_valid += len - Buf_size; | |
594 } | |
595 else { | |
596 // bi_buf |= (value) << bi_valid; | |
597 bi_buf |= (((value) << bi_valid) & 0xffff); | |
598 bi_valid += len; | |
599 } | |
600 } | |
601 | |
602 // Send one empty static block to give enough lookahead for inflate. | |
603 // This takes 10 bits, of which 7 may remain in the bit buffer. | |
604 // The current inflate code requires 9 bits of lookahead. If the | |
605 // last two codes for the previous block (real code plus EOB) were coded | |
606 // on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode | |
607 // the last real code. In this case we send two empty static blocks instead | |
608 // of one. (There are no problems if the previous block is stored or fixed.) | |
609 // To simplify the code, we assume the worst case of last real code encoded | |
610 // on one bit only. | |
611 void _tr_align() { | |
612 send_bits(STATIC_TREES << 1, 3); | |
613 send_code(END_BLOCK, StaticTree.static_ltree); | |
614 bi_flush(); | |
615 | |
616 // Of the 10 bits for the empty block, we have already sent | |
617 // (10 - bi_valid) bits. The lookahead for the last real code (before | |
618 // the EOB of the previous block) was thus at least one plus the length | |
619 // of the EOB plus what we have just sent of the empty static block. | |
620 if (1 + last_eob_len + 10 - bi_valid < 9) { | |
621 send_bits(STATIC_TREES << 1, 3); | |
622 send_code(END_BLOCK, StaticTree.static_ltree); | |
623 bi_flush(); | |
624 } | |
625 | |
626 last_eob_len = 7; | |
627 } | |
628 | |
629 | |
630 // Save the match info and tally the frequency counts. Return true if | |
631 // the current block must be flushed. | |
632 boolean _tr_tally(int dist, // distance of matched string | |
633 int lc // match length-MIN_MATCH or unmatched char (if dist==0) | |
634 ) { | |
635 pending_buf[d_buf + last_lit * 2] = (byte)(dist >>> 8); | |
636 pending_buf[d_buf + last_lit * 2 + 1] = (byte)dist; | |
637 pending_buf[l_buf + last_lit] = (byte)lc; last_lit++; | |
638 | |
639 if (dist == 0) { | |
640 // lc is the unmatched char | |
641 dyn_ltree[lc * 2]++; | |
642 } | |
643 else { | |
644 matches++; | |
645 // Here, lc is the match length - MIN_MATCH | |
646 dist--; // dist = match distance - 1 | |
647 dyn_ltree[(Tree._length_code[lc] + LITERALS + 1) * 2]++; | |
648 dyn_dtree[Tree.d_code(dist) * 2]++; | |
649 } | |
650 | |
651 if ((last_lit & 0x1fff) == 0 && level > 2) { | |
652 // Compute an upper bound for the compressed length | |
653 int out_length = last_lit * 8; | |
654 int in_length = strstart - block_start; | |
655 int dcode; | |
656 | |
657 for (dcode = 0; dcode < D_CODES; dcode++) { | |
658 out_length += (int)dyn_dtree[dcode * 2] * | |
659 (5L + Tree.extra_dbits[dcode]); | |
660 } | |
661 | |
662 out_length >>>= 3; | |
663 | |
664 if ((matches < (last_lit / 2)) && out_length < in_length / 2) return true; | |
665 } | |
666 | |
667 return (last_lit == lit_bufsize - 1); | |
668 // We avoid equality with lit_bufsize because of wraparound at 64K | |
669 // on 16 bit machines and because stored blocks are restricted to | |
670 // 64K-1 bytes. | |
671 } | |
672 | |
673 // Send the block data compressed using the given Huffman trees | |
674 void compress_block(short[] ltree, short[] dtree) { | |
675 int dist; // distance of matched string | |
676 int lc; // match length or unmatched char (if dist == 0) | |
677 int lx = 0; // running index in l_buf | |
678 int code; // the code to send | |
679 int extra; // number of extra bits to send | |
680 | |
681 if (last_lit != 0) { | |
682 do { | |
683 dist = ((pending_buf[d_buf + lx * 2] << 8) & 0xff00) | | |
684 (pending_buf[d_buf + lx * 2 + 1] & 0xff); | |
685 lc = (pending_buf[l_buf + lx]) & 0xff; lx++; | |
686 | |
687 if (dist == 0) { | |
688 send_code(lc, ltree); // send a literal byte | |
689 } | |
690 else { | |
691 // Here, lc is the match length - MIN_MATCH | |
692 code = Tree._length_code[lc]; | |
693 send_code(code + LITERALS + 1, ltree); // send the length code | |
694 extra = Tree.extra_lbits[code]; | |
695 | |
696 if (extra != 0) { | |
697 lc -= Tree.base_length[code]; | |
698 send_bits(lc, extra); // send the extra length bits | |
699 } | |
700 | |
701 dist--; // dist is now the match distance - 1 | |
702 code = Tree.d_code(dist); | |
703 send_code(code, dtree); // send the distance code | |
704 extra = Tree.extra_dbits[code]; | |
705 | |
706 if (extra != 0) { | |
707 dist -= Tree.base_dist[code]; | |
708 send_bits(dist, extra); // send the extra distance bits | |
709 } | |
710 } // literal or match pair ? | |
711 | |
712 // Check that the overlay between pending_buf and d_buf+l_buf is ok: | |
713 } | |
714 while (lx < last_lit); | |
715 } | |
716 | |
717 send_code(END_BLOCK, ltree); | |
718 last_eob_len = ltree[END_BLOCK * 2 + 1]; | |
719 } | |
720 | |
721 // Set the data type to ASCII or BINARY, using a crude approximation: | |
722 // binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. | |
723 // IN assertion: the fields freq of dyn_ltree are set and the total of all | |
724 // frequencies does not exceed 64K (to fit in an int on 16 bit machines). | |
725 void set_data_type() { | |
726 int n = 0; | |
727 int ascii_freq = 0; | |
728 int bin_freq = 0; | |
729 | |
730 while (n < 7) { bin_freq += dyn_ltree[n * 2]; n++;} | |
731 | |
732 while (n < 128) { ascii_freq += dyn_ltree[n * 2]; n++;} | |
733 | |
734 while (n < LITERALS) { bin_freq += dyn_ltree[n * 2]; n++;} | |
735 | |
736 data_type = (byte)(bin_freq > (ascii_freq >>> 2) ? Z_BINARY : Z_ASCII); | |
737 } | |
738 | |
739 // Flush the bit buffer, keeping at most 7 bits in it. | |
740 void bi_flush() { | |
741 if (bi_valid == 16) { | |
742 put_short(bi_buf); | |
743 bi_buf = 0; | |
744 bi_valid = 0; | |
745 } | |
746 else if (bi_valid >= 8) { | |
747 put_byte((byte)bi_buf); | |
748 bi_buf >>>= 8; | |
749 bi_valid -= 8; | |
750 } | |
751 } | |
752 | |
753 // Flush the bit buffer and align the output on a byte boundary | |
754 void bi_windup() { | |
755 if (bi_valid > 8) { | |
756 put_short(bi_buf); | |
757 } | |
758 else if (bi_valid > 0) { | |
759 put_byte((byte)bi_buf); | |
760 } | |
761 | |
762 bi_buf = 0; | |
763 bi_valid = 0; | |
764 } | |
765 | |
766 // Copy a stored block, storing first the length and its | |
767 // one's complement if requested. | |
768 void copy_block(int buf, // the input data | |
769 int len, // its length | |
770 boolean header // true if block header must be written | |
771 ) { | |
772 int index = 0; | |
773 bi_windup(); // align on byte boundary | |
774 last_eob_len = 8; // enough lookahead for inflate | |
775 | |
776 if (header) { | |
777 put_short((short)len); | |
778 put_short((short)~len); | |
779 } | |
780 | |
781 // while(len--!=0) { | |
782 // put_byte(window[buf+index]); | |
783 // index++; | |
784 // } | |
785 put_byte(window, buf, len); | |
786 } | |
787 | |
788 void flush_block_only(boolean eof) { | |
789 _tr_flush_block(block_start >= 0 ? block_start : -1, | |
790 strstart - block_start, | |
791 eof); | |
792 block_start = strstart; | |
793 strm.flush_pending(); | |
794 } | |
795 | |
796 // Copy without compression as much as possible from the input stream, return | |
797 // the current block state. | |
798 // This function does not insert new strings in the dictionary since | |
799 // uncompressible data is probably not useful. This function is used | |
800 // only for the level=0 compression option. | |
801 // NOTE: this function should be optimized to avoid extra copying from | |
802 // window to pending_buf. | |
803 int deflate_stored(int flush) { | |
804 // Stored blocks are limited to 0xffff bytes, pending_buf is limited | |
805 // to pending_buf_size, and each stored block has a 5 byte header: | |
806 int max_block_size = 0xffff; | |
807 int max_start; | |
808 | |
809 if (max_block_size > pending_buf_size - 5) { | |
810 max_block_size = pending_buf_size - 5; | |
811 } | |
812 | |
813 // Copy as much as possible from input to output: | |
814 while (true) { | |
815 // Fill the window as much as possible: | |
816 if (lookahead <= 1) { | |
817 fill_window(); | |
818 | |
819 if (lookahead == 0 && flush == Z_NO_FLUSH) return NeedMore; | |
820 | |
821 if (lookahead == 0) break; // flush the current block | |
822 } | |
823 | |
824 strstart += lookahead; | |
825 lookahead = 0; | |
826 // Emit a stored block if pending_buf will be full: | |
827 max_start = block_start + max_block_size; | |
828 | |
829 if (strstart == 0 || strstart >= max_start) { | |
830 // strstart == 0 is possible when wraparound on 16-bit machine | |
831 lookahead = (int)(strstart - max_start); | |
832 strstart = (int)max_start; | |
833 flush_block_only(false); | |
834 | |
835 if (strm.avail_out == 0) return NeedMore; | |
836 } | |
837 | |
838 // Flush if we may have to slide, otherwise block_start may become | |
839 // negative and the data will be gone: | |
840 if (strstart - block_start >= w_size - MIN_LOOKAHEAD) { | |
841 flush_block_only(false); | |
842 | |
843 if (strm.avail_out == 0) return NeedMore; | |
844 } | |
845 } | |
846 | |
847 flush_block_only(flush == Z_FINISH); | |
848 | |
849 if (strm.avail_out == 0) | |
850 return (flush == Z_FINISH) ? FinishStarted : NeedMore; | |
851 | |
852 return flush == Z_FINISH ? FinishDone : BlockDone; | |
853 } | |
854 | |
855 // Send a stored block | |
856 void _tr_stored_block(int buf, // input block | |
857 int stored_len, // length of input block | |
858 boolean eof // true if this is the last block for a file | |
859 ) { | |
860 send_bits((STORED_BLOCK << 1) + (eof ? 1 : 0), 3); // send block type | |
861 copy_block(buf, stored_len, true); // with header | |
862 } | |
863 | |
864 // Determine the best encoding for the current block: dynamic trees, static | |
865 // trees or store, and output the encoded block to the zip file. | |
866 void _tr_flush_block(int buf, // input block, or NULL if too old | |
867 int stored_len, // length of input block | |
868 boolean eof // true if this is the last block for a file | |
869 ) { | |
870 int opt_lenb, static_lenb;// opt_len and static_len in bytes | |
871 int max_blindex = 0; // index of last bit length code of non zero freq | |
872 | |
873 // Build the Huffman trees unless a stored block is forced | |
874 if (level > 0) { | |
875 // Check if the file is ascii or binary | |
876 if (data_type == Z_UNKNOWN) set_data_type(); | |
877 | |
878 // Construct the literal and distance trees | |
879 l_desc.build_tree(this); | |
880 d_desc.build_tree(this); | |
881 // At this point, opt_len and static_len are the total bit lengths of | |
882 // the compressed block data, excluding the tree representations. | |
883 // Build the bit length tree for the above two trees, and get the index | |
884 // in bl_order of the last bit length code to send. | |
885 max_blindex = build_bl_tree(); | |
886 // Determine the best encoding. Compute first the block length in bytes | |
887 opt_lenb = (opt_len + 3 + 7) >>> 3; | |
888 static_lenb = (static_len + 3 + 7) >>> 3; | |
889 | |
890 if (static_lenb <= opt_lenb) opt_lenb = static_lenb; | |
891 } | |
892 else { | |
893 opt_lenb = static_lenb = stored_len + 5; // force a stored block | |
894 } | |
895 | |
896 if (stored_len + 4 <= opt_lenb && buf != -1) { | |
897 // 4: two words for the lengths | |
898 // The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. | |
899 // Otherwise we can't have processed more than WSIZE input bytes since | |
900 // the last block flush, because compression would have been | |
901 // successful. If LIT_BUFSIZE <= WSIZE, it is never too late to | |
902 // transform a block into a stored block. | |
903 _tr_stored_block(buf, stored_len, eof); | |
904 } | |
905 else if (static_lenb == opt_lenb) { | |
906 send_bits((STATIC_TREES << 1) + (eof ? 1 : 0), 3); | |
907 compress_block(StaticTree.static_ltree, StaticTree.static_dtree); | |
908 } | |
909 else { | |
910 send_bits((DYN_TREES << 1) + (eof ? 1 : 0), 3); | |
911 send_all_trees(l_desc.max_code + 1, d_desc.max_code + 1, max_blindex + 1); | |
912 compress_block(dyn_ltree, dyn_dtree); | |
913 } | |
914 | |
915 // The above check is made mod 2^32, for files larger than 512 MB | |
916 // and uLong implemented on 32 bits. | |
917 init_block(); | |
918 | |
919 if (eof) { | |
920 bi_windup(); | |
921 } | |
922 } | |
923 | |
924 // Fill the window when the lookahead becomes insufficient. | |
925 // Updates strstart and lookahead. | |
926 // | |
927 // IN assertion: lookahead < MIN_LOOKAHEAD | |
928 // OUT assertions: strstart <= window_size-MIN_LOOKAHEAD | |
929 // At least one byte has been read, or avail_in == 0; reads are | |
930 // performed for at least two bytes (required for the zip translate_eol | |
931 // option -- not supported here). | |
932 void fill_window() { | |
933 int n, m; | |
934 int p; | |
935 int more; // Amount of free space at the end of the window. | |
936 | |
937 do { | |
938 more = (window_size - lookahead - strstart); | |
939 | |
940 // Deal with !@#$% 64K limit: | |
941 if (more == 0 && strstart == 0 && lookahead == 0) { | |
942 more = w_size; | |
943 } | |
944 else if (more == -1) { | |
945 // Very unlikely, but possible on 16 bit machine if strstart == 0 | |
946 // and lookahead == 1 (input done one byte at time) | |
947 more--; | |
948 // If the window is almost full and there is insufficient lookahead, | |
949 // move the upper half to the lower one to make room in the upper half. | |
950 } | |
951 else if (strstart >= w_size + w_size - MIN_LOOKAHEAD) { | |
952 System.arraycopy(window, w_size, window, 0, w_size); | |
953 match_start -= w_size; | |
954 strstart -= w_size; // we now have strstart >= MAX_DIST | |
955 block_start -= w_size; | |
956 // Slide the hash table (could be avoided with 32 bit values | |
957 // at the expense of memory usage). We slide even when level == 0 | |
958 // to keep the hash table consistent if we switch back to level > 0 | |
959 // later. (Using level 0 permanently is not an optimal usage of | |
960 // zlib, so we don't care about this pathological case.) | |
961 n = hash_size; | |
962 p = n; | |
963 | |
964 do { | |
965 m = (head[--p] & 0xffff); | |
966 head[p] = (m >= w_size ? (short)(m - w_size) : 0); | |
967 } | |
968 while (--n != 0); | |
969 | |
970 n = w_size; | |
971 p = n; | |
972 | |
973 do { | |
974 m = (prev[--p] & 0xffff); | |
975 prev[p] = (m >= w_size ? (short)(m - w_size) : 0); | |
976 // If n is not on any hash chain, prev[n] is garbage but | |
977 // its value will never be used. | |
978 } | |
979 while (--n != 0); | |
980 | |
981 more += w_size; | |
982 } | |
983 | |
984 if (strm.avail_in == 0) return; | |
985 | |
986 // If there was no sliding: | |
987 // strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && | |
988 // more == window_size - lookahead - strstart | |
989 // => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) | |
990 // => more >= window_size - 2*WSIZE + 2 | |
991 // In the BIG_MEM or MMAP case (not yet supported), | |
992 // window_size == input_size + MIN_LOOKAHEAD && | |
993 // strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. | |
994 // Otherwise, window_size == 2*WSIZE so more >= 2. | |
995 // If there was sliding, more >= WSIZE. So in all cases, more >= 2. | |
996 n = strm.read_buf(window, strstart + lookahead, more); | |
997 lookahead += n; | |
998 | |
999 // Initialize the hash value now that we have some input: | |
1000 if (lookahead >= MIN_MATCH) { | |
1001 ins_h = window[strstart] & 0xff; | |
1002 ins_h = (((ins_h) << hash_shift) ^ (window[strstart + 1] & 0xff)) & hash_mask; | |
1003 } | |
1004 | |
1005 // If the whole input has less than MIN_MATCH bytes, ins_h is garbage, | |
1006 // but this is not important since only literal bytes will be emitted. | |
1007 } | |
1008 while (lookahead < MIN_LOOKAHEAD && strm.avail_in != 0); | |
1009 } | |
1010 | |
1011 // Compress as much as possible from the input stream, return the current | |
1012 // block state. | |
1013 // This function does not perform lazy evaluation of matches and inserts | |
1014 // new strings in the dictionary only for unmatched strings or for short | |
1015 // matches. It is used only for the fast compression options. | |
1016 int deflate_fast(int flush) { | |
1017 // short hash_head = 0; // head of the hash chain | |
1018 int hash_head = 0; // head of the hash chain | |
1019 boolean bflush; // set if current block must be flushed | |
1020 | |
1021 while (true) { | |
1022 // Make sure that we always have enough lookahead, except | |
1023 // at the end of the input file. We need MAX_MATCH bytes | |
1024 // for the next match, plus MIN_MATCH bytes to insert the | |
1025 // string following the next match. | |
1026 if (lookahead < MIN_LOOKAHEAD) { | |
1027 fill_window(); | |
1028 | |
1029 if (lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { | |
1030 return NeedMore; | |
1031 } | |
1032 | |
1033 if (lookahead == 0) break; // flush the current block | |
1034 } | |
1035 | |
1036 // Insert the string window[strstart .. strstart+2] in the | |
1037 // dictionary, and set hash_head to the head of the hash chain: | |
1038 if (lookahead >= MIN_MATCH) { | |
1039 ins_h = (((ins_h) << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; | |
1040 // prev[strstart&w_mask]=hash_head=head[ins_h]; | |
1041 hash_head = (head[ins_h] & 0xffff); | |
1042 prev[strstart & w_mask] = head[ins_h]; | |
1043 head[ins_h] = (short)strstart; | |
1044 } | |
1045 | |
1046 // Find the longest match, discarding those <= prev_length. | |
1047 // At this point we have always match_length < MIN_MATCH | |
1048 if (hash_head != 0L && | |
1049 ((strstart - hash_head) & 0xffff) <= w_size - MIN_LOOKAHEAD | |
1050 ) { | |
1051 // To simplify the code, we prevent matches with the string | |
1052 // of window index 0 (in particular we have to avoid a match | |
1053 // of the string with itself at the start of the input file). | |
1054 if (strategy != Z_HUFFMAN_ONLY) { | |
1055 match_length = longest_match(hash_head); | |
1056 } | |
1057 | |
1058 // longest_match() sets match_start | |
1059 } | |
1060 | |
1061 if (match_length >= MIN_MATCH) { | |
1062 // check_match(strstart, match_start, match_length); | |
1063 bflush = _tr_tally(strstart - match_start, match_length - MIN_MATCH); | |
1064 lookahead -= match_length; | |
1065 | |
1066 // Insert new strings in the hash table only if the match length | |
1067 // is not too large. This saves time but degrades compression. | |
1068 if (match_length <= max_lazy_match && | |
1069 lookahead >= MIN_MATCH) { | |
1070 match_length--; // string at strstart already in hash table | |
1071 | |
1072 do { | |
1073 strstart++; | |
1074 ins_h = ((ins_h << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; | |
1075 // prev[strstart&w_mask]=hash_head=head[ins_h]; | |
1076 hash_head = (head[ins_h] & 0xffff); | |
1077 prev[strstart & w_mask] = head[ins_h]; | |
1078 head[ins_h] = (short)strstart; | |
1079 // strstart never exceeds WSIZE-MAX_MATCH, so there are | |
1080 // always MIN_MATCH bytes ahead. | |
1081 } | |
1082 while (--match_length != 0); | |
1083 | |
1084 strstart++; | |
1085 } | |
1086 else { | |
1087 strstart += match_length; | |
1088 match_length = 0; | |
1089 ins_h = window[strstart] & 0xff; | |
1090 ins_h = (((ins_h) << hash_shift) ^ (window[strstart + 1] & 0xff)) & hash_mask; | |
1091 // If lookahead < MIN_MATCH, ins_h is garbage, but it does not | |
1092 // matter since it will be recomputed at next deflate call. | |
1093 } | |
1094 } | |
1095 else { | |
1096 // No match, output a literal byte | |
1097 bflush = _tr_tally(0, window[strstart] & 0xff); | |
1098 lookahead--; | |
1099 strstart++; | |
1100 } | |
1101 | |
1102 if (bflush) { | |
1103 flush_block_only(false); | |
1104 | |
1105 if (strm.avail_out == 0) return NeedMore; | |
1106 } | |
1107 } | |
1108 | |
1109 flush_block_only(flush == Z_FINISH); | |
1110 | |
1111 if (strm.avail_out == 0) { | |
1112 if (flush == Z_FINISH) return FinishStarted; | |
1113 else return NeedMore; | |
1114 } | |
1115 | |
1116 return flush == Z_FINISH ? FinishDone : BlockDone; | |
1117 } | |
1118 | |
1119 // Same as above, but achieves better compression. We use a lazy | |
1120 // evaluation for matches: a match is finally adopted only if there is | |
1121 // no better match at the next window position. | |
1122 int deflate_slow(int flush) { | |
1123 // short hash_head = 0; // head of hash chain | |
1124 int hash_head = 0; // head of hash chain | |
1125 boolean bflush; // set if current block must be flushed | |
1126 | |
1127 // Process the input block. | |
1128 while (true) { | |
1129 // Make sure that we always have enough lookahead, except | |
1130 // at the end of the input file. We need MAX_MATCH bytes | |
1131 // for the next match, plus MIN_MATCH bytes to insert the | |
1132 // string following the next match. | |
1133 if (lookahead < MIN_LOOKAHEAD) { | |
1134 fill_window(); | |
1135 | |
1136 if (lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { | |
1137 return NeedMore; | |
1138 } | |
1139 | |
1140 if (lookahead == 0) break; // flush the current block | |
1141 } | |
1142 | |
1143 // Insert the string window[strstart .. strstart+2] in the | |
1144 // dictionary, and set hash_head to the head of the hash chain: | |
1145 if (lookahead >= MIN_MATCH) { | |
1146 ins_h = (((ins_h) << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; | |
1147 // prev[strstart&w_mask]=hash_head=head[ins_h]; | |
1148 hash_head = (head[ins_h] & 0xffff); | |
1149 prev[strstart & w_mask] = head[ins_h]; | |
1150 head[ins_h] = (short)strstart; | |
1151 } | |
1152 | |
1153 // Find the longest match, discarding those <= prev_length. | |
1154 prev_length = match_length; prev_match = match_start; | |
1155 match_length = MIN_MATCH - 1; | |
1156 | |
1157 if (hash_head != 0 && prev_length < max_lazy_match && | |
1158 ((strstart - hash_head) & 0xffff) <= w_size - MIN_LOOKAHEAD | |
1159 ) { | |
1160 // To simplify the code, we prevent matches with the string | |
1161 // of window index 0 (in particular we have to avoid a match | |
1162 // of the string with itself at the start of the input file). | |
1163 if (strategy != Z_HUFFMAN_ONLY) { | |
1164 match_length = longest_match(hash_head); | |
1165 } | |
1166 | |
1167 // longest_match() sets match_start | |
1168 if (match_length <= 5 && (strategy == Z_FILTERED || | |
1169 (match_length == MIN_MATCH && | |
1170 strstart - match_start > 4096))) { | |
1171 // If prev_match is also MIN_MATCH, match_start is garbage | |
1172 // but we will ignore the current match anyway. | |
1173 match_length = MIN_MATCH - 1; | |
1174 } | |
1175 } | |
1176 | |
1177 // If there was a match at the previous step and the current | |
1178 // match is not better, output the previous match: | |
1179 if (prev_length >= MIN_MATCH && match_length <= prev_length) { | |
1180 int max_insert = strstart + lookahead - MIN_MATCH; | |
1181 // Do not insert strings in hash table beyond this. | |
1182 // check_match(strstart-1, prev_match, prev_length); | |
1183 bflush = _tr_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH); | |
1184 // Insert in hash table all strings up to the end of the match. | |
1185 // strstart-1 and strstart are already inserted. If there is not | |
1186 // enough lookahead, the last two strings are not inserted in | |
1187 // the hash table. | |
1188 lookahead -= prev_length - 1; | |
1189 prev_length -= 2; | |
1190 | |
1191 do { | |
1192 if (++strstart <= max_insert) { | |
1193 ins_h = (((ins_h) << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; | |
1194 //prev[strstart&w_mask]=hash_head=head[ins_h]; | |
1195 hash_head = (head[ins_h] & 0xffff); | |
1196 prev[strstart & w_mask] = head[ins_h]; | |
1197 head[ins_h] = (short)strstart; | |
1198 } | |
1199 } | |
1200 while (--prev_length != 0); | |
1201 | |
1202 match_available = 0; | |
1203 match_length = MIN_MATCH - 1; | |
1204 strstart++; | |
1205 | |
1206 if (bflush) { | |
1207 flush_block_only(false); | |
1208 | |
1209 if (strm.avail_out == 0) return NeedMore; | |
1210 } | |
1211 } | |
1212 else if (match_available != 0) { | |
1213 // If there was no match at the previous position, output a | |
1214 // single literal. If there was a match but the current match | |
1215 // is longer, truncate the previous match to a single literal. | |
1216 bflush = _tr_tally(0, window[strstart - 1] & 0xff); | |
1217 | |
1218 if (bflush) { | |
1219 flush_block_only(false); | |
1220 } | |
1221 | |
1222 strstart++; | |
1223 lookahead--; | |
1224 | |
1225 if (strm.avail_out == 0) return NeedMore; | |
1226 } | |
1227 else { | |
1228 // There is no previous match to compare with, wait for | |
1229 // the next step to decide. | |
1230 match_available = 1; | |
1231 strstart++; | |
1232 lookahead--; | |
1233 } | |
1234 } | |
1235 | |
1236 if (match_available != 0) { | |
1237 bflush = _tr_tally(0, window[strstart - 1] & 0xff); | |
1238 match_available = 0; | |
1239 } | |
1240 | |
1241 flush_block_only(flush == Z_FINISH); | |
1242 | |
1243 if (strm.avail_out == 0) { | |
1244 if (flush == Z_FINISH) return FinishStarted; | |
1245 else return NeedMore; | |
1246 } | |
1247 | |
1248 return flush == Z_FINISH ? FinishDone : BlockDone; | |
1249 } | |
1250 | |
1251 int longest_match(int cur_match) { | |
1252 int chain_length = max_chain_length; // max hash chain length | |
1253 int scan = strstart; // current string | |
1254 int match; // matched string | |
1255 int len; // length of current match | |
1256 int best_len = prev_length; // best match length so far | |
1257 int limit = strstart > (w_size - MIN_LOOKAHEAD) ? | |
1258 strstart - (w_size - MIN_LOOKAHEAD) : 0; | |
1259 int nice_match = this.nice_match; | |
1260 // Stop when cur_match becomes <= limit. To simplify the code, | |
1261 // we prevent matches with the string of window index 0. | |
1262 int wmask = w_mask; | |
1263 int strend = strstart + MAX_MATCH; | |
1264 byte scan_end1 = window[scan + best_len - 1]; | |
1265 byte scan_end = window[scan + best_len]; | |
1266 | |
1267 // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. | |
1268 // It is easy to get rid of this optimization if necessary. | |
1269 // Do not waste too much time if we already have a good match: | |
1270 if (prev_length >= good_match) { | |
1271 chain_length >>= 2; | |
1272 } | |
1273 | |
1274 // Do not look for matches beyond the end of the input. This is necessary | |
1275 // to make deflate deterministic. | |
1276 if (nice_match > lookahead) nice_match = lookahead; | |
1277 | |
1278 do { | |
1279 match = cur_match; | |
1280 | |
1281 // Skip to next match if the match length cannot increase | |
1282 // or if the match length is less than 2: | |
1283 if (window[match + best_len] != scan_end || | |
1284 window[match + best_len - 1] != scan_end1 || | |
1285 window[match] != window[scan] || | |
1286 window[++match] != window[scan + 1]) continue; | |
1287 | |
1288 // The check at best_len-1 can be removed because it will be made | |
1289 // again later. (This heuristic is not always a win.) | |
1290 // It is not necessary to compare scan[2] and match[2] since they | |
1291 // are always equal when the other bytes match, given that | |
1292 // the hash keys are equal and that HASH_BITS >= 8. | |
1293 scan += 2; match++; | |
1294 | |
1295 // We check for insufficient lookahead only every 8th comparison; | |
1296 // the 256th check will be made at strstart+258. | |
1297 do { | |
1298 } | |
1299 while (window[++scan] == window[++match] && | |
1300 window[++scan] == window[++match] && | |
1301 window[++scan] == window[++match] && | |
1302 window[++scan] == window[++match] && | |
1303 window[++scan] == window[++match] && | |
1304 window[++scan] == window[++match] && | |
1305 window[++scan] == window[++match] && | |
1306 window[++scan] == window[++match] && | |
1307 scan < strend); | |
1308 | |
1309 len = MAX_MATCH - (int)(strend - scan); | |
1310 scan = strend - MAX_MATCH; | |
1311 | |
1312 if (len > best_len) { | |
1313 match_start = cur_match; | |
1314 best_len = len; | |
1315 | |
1316 if (len >= nice_match) break; | |
1317 | |
1318 scan_end1 = window[scan + best_len - 1]; | |
1319 scan_end = window[scan + best_len]; | |
1320 } | |
1321 } | |
1322 while ((cur_match = (prev[cur_match & wmask] & 0xffff)) > limit | |
1323 && --chain_length != 0); | |
1324 | |
1325 if (best_len <= lookahead) return best_len; | |
1326 | |
1327 return lookahead; | |
1328 } | |
1329 | |
1330 int deflateInit(ZStream strm, int level, int bits) { | |
1331 return deflateInit2(strm, level, Z_DEFLATED, bits, DEF_MEM_LEVEL, | |
1332 Z_DEFAULT_STRATEGY); | |
1333 } | |
1334 int deflateInit(ZStream strm, int level) { | |
1335 return deflateInit(strm, level, MAX_WBITS); | |
1336 } | |
1337 int deflateInit2(ZStream strm, int level, int method, int windowBits, | |
1338 int memLevel, int strategy) { | |
1339 int noheader = 0; | |
1340 // byte[] my_version=ZLIB_VERSION; | |
1341 // | |
1342 // if (version == null || version[0] != my_version[0] | |
1343 // || stream_size != sizeof(z_stream)) { | |
1344 // return Z_VERSION_ERROR; | |
1345 // } | |
1346 strm.msg = null; | |
1347 | |
1348 if (level == Z_DEFAULT_COMPRESSION) level = 6; | |
1349 | |
1350 if (windowBits < 0) { // undocumented feature: suppress zlib header | |
1351 noheader = 1; | |
1352 windowBits = -windowBits; | |
1353 } | |
1354 | |
1355 if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || | |
1356 method != Z_DEFLATED || | |
1357 windowBits < 9 || windowBits > 15 || level < 0 || level > 9 || | |
1358 strategy < 0 || strategy > Z_HUFFMAN_ONLY) { | |
1359 return Z_STREAM_ERROR; | |
1360 } | |
1361 | |
1362 strm.dstate = (Deflate)this; | |
1363 this.noheader = noheader; | |
1364 w_bits = windowBits; | |
1365 w_size = 1 << w_bits; | |
1366 w_mask = w_size - 1; | |
1367 hash_bits = memLevel + 7; | |
1368 hash_size = 1 << hash_bits; | |
1369 hash_mask = hash_size - 1; | |
1370 hash_shift = ((hash_bits + MIN_MATCH - 1) / MIN_MATCH); | |
1371 window = new byte[w_size * 2]; | |
1372 prev = new short[w_size]; | |
1373 head = new short[hash_size]; | |
1374 lit_bufsize = 1 << (memLevel + 6); // 16K elements by default | |
1375 // We overlay pending_buf and d_buf+l_buf. This works since the average | |
1376 // output size for (length,distance) codes is <= 24 bits. | |
1377 pending_buf = new byte[lit_bufsize * 4]; | |
1378 pending_buf_size = lit_bufsize * 4; | |
1379 d_buf = lit_bufsize / 2; | |
1380 l_buf = (1 + 2) * lit_bufsize; | |
1381 this.level = level; | |
1382 //System.out.println("level="+level); | |
1383 this.strategy = strategy; | |
1384 this.method = (byte)method; | |
1385 return deflateReset(strm); | |
1386 } | |
1387 | |
1388 int deflateReset(ZStream strm) { | |
1389 strm.total_in = strm.total_out = 0; | |
1390 strm.msg = null; // | |
1391 strm.data_type = Z_UNKNOWN; | |
1392 pending = 0; | |
1393 pending_out = 0; | |
1394 | |
1395 if (noheader < 0) { | |
1396 noheader = 0; // was set to -1 by deflate(..., Z_FINISH); | |
1397 } | |
1398 | |
1399 status = (noheader != 0) ? BUSY_STATE : INIT_STATE; | |
1400 strm.adler = strm._adler.adler32(0, null, 0, 0); | |
1401 last_flush = Z_NO_FLUSH; | |
1402 tr_init(); | |
1403 lm_init(); | |
1404 return Z_OK; | |
1405 } | |
1406 | |
1407 int deflateEnd() { | |
1408 if (status != INIT_STATE && status != BUSY_STATE && status != FINISH_STATE) { | |
1409 return Z_STREAM_ERROR; | |
1410 } | |
1411 | |
1412 // Deallocate in reverse order of allocations: | |
1413 pending_buf = null; | |
1414 head = null; | |
1415 prev = null; | |
1416 window = null; | |
1417 // free | |
1418 // dstate=null; | |
1419 return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; | |
1420 } | |
1421 | |
1422 int deflateParams(ZStream strm, int _level, int _strategy) { | |
1423 int err = Z_OK; | |
1424 | |
1425 if (_level == Z_DEFAULT_COMPRESSION) { | |
1426 _level = 6; | |
1427 } | |
1428 | |
1429 if (_level < 0 || _level > 9 || | |
1430 _strategy < 0 || _strategy > Z_HUFFMAN_ONLY) { | |
1431 return Z_STREAM_ERROR; | |
1432 } | |
1433 | |
1434 if (config_table[level].func != config_table[_level].func && | |
1435 strm.total_in != 0) { | |
1436 // Flush the last buffer: | |
1437 err = strm.deflate(Z_PARTIAL_FLUSH); | |
1438 } | |
1439 | |
1440 if (level != _level) { | |
1441 level = _level; | |
1442 max_lazy_match = config_table[level].max_lazy; | |
1443 good_match = config_table[level].good_length; | |
1444 nice_match = config_table[level].nice_length; | |
1445 max_chain_length = config_table[level].max_chain; | |
1446 } | |
1447 | |
1448 strategy = _strategy; | |
1449 return err; | |
1450 } | |
1451 | |
1452 int deflateSetDictionary(ZStream strm, byte[] dictionary, int dictLength) { | |
1453 int length = dictLength; | |
1454 int index = 0; | |
1455 | |
1456 if (dictionary == null || status != INIT_STATE) | |
1457 return Z_STREAM_ERROR; | |
1458 | |
1459 strm.adler = strm._adler.adler32(strm.adler, dictionary, 0, dictLength); | |
1460 | |
1461 if (length < MIN_MATCH) return Z_OK; | |
1462 | |
1463 if (length > w_size - MIN_LOOKAHEAD) { | |
1464 length = w_size - MIN_LOOKAHEAD; | |
1465 index = dictLength - length; // use the tail of the dictionary | |
1466 } | |
1467 | |
1468 System.arraycopy(dictionary, index, window, 0, length); | |
1469 strstart = length; | |
1470 block_start = length; | |
1471 // Insert all strings in the hash table (except for the last two bytes). | |
1472 // s->lookahead stays null, so s->ins_h will be recomputed at the next | |
1473 // call of fill_window. | |
1474 ins_h = window[0] & 0xff; | |
1475 ins_h = (((ins_h) << hash_shift) ^ (window[1] & 0xff)) & hash_mask; | |
1476 | |
1477 for (int n = 0; n <= length - MIN_MATCH; n++) { | |
1478 ins_h = (((ins_h) << hash_shift) ^ (window[(n) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; | |
1479 prev[n & w_mask] = head[ins_h]; | |
1480 head[ins_h] = (short)n; | |
1481 } | |
1482 | |
1483 return Z_OK; | |
1484 } | |
1485 | |
1486 int deflate(ZStream strm, int flush) { | |
1487 int old_flush; | |
1488 | |
1489 if (flush > Z_FINISH || flush < 0) { | |
1490 return Z_STREAM_ERROR; | |
1491 } | |
1492 | |
1493 if (strm.next_out == null || | |
1494 (strm.next_in == null && strm.avail_in != 0) || | |
1495 (status == FINISH_STATE && flush != Z_FINISH)) { | |
1496 strm.msg = z_errmsg[Z_NEED_DICT - (Z_STREAM_ERROR)]; | |
1497 return Z_STREAM_ERROR; | |
1498 } | |
1499 | |
1500 if (strm.avail_out == 0) { | |
1501 strm.msg = z_errmsg[Z_NEED_DICT - (Z_BUF_ERROR)]; | |
1502 return Z_BUF_ERROR; | |
1503 } | |
1504 | |
1505 this.strm = strm; // just in case | |
1506 old_flush = last_flush; | |
1507 last_flush = flush; | |
1508 | |
1509 // Write the zlib header | |
1510 if (status == INIT_STATE) { | |
1511 int header = (Z_DEFLATED + ((w_bits - 8) << 4)) << 8; | |
1512 int level_flags = ((level - 1) & 0xff) >> 1; | |
1513 | |
1514 if (level_flags > 3) level_flags = 3; | |
1515 | |
1516 header |= (level_flags << 6); | |
1517 | |
1518 if (strstart != 0) header |= PRESET_DICT; | |
1519 | |
1520 header += 31 - (header % 31); | |
1521 status = BUSY_STATE; | |
1522 putShortMSB(header); | |
1523 | |
1524 // Save the adler32 of the preset dictionary: | |
1525 if (strstart != 0) { | |
1526 putShortMSB((int)(strm.adler >>> 16)); | |
1527 putShortMSB((int)(strm.adler & 0xffff)); | |
1528 } | |
1529 | |
1530 strm.adler = strm._adler.adler32(0, null, 0, 0); | |
1531 } | |
1532 | |
1533 // Flush as much pending output as possible | |
1534 if (pending != 0) { | |
1535 strm.flush_pending(); | |
1536 | |
1537 if (strm.avail_out == 0) { | |
1538 //System.out.println(" avail_out==0"); | |
1539 // Since avail_out is 0, deflate will be called again with | |
1540 // more output space, but possibly with both pending and | |
1541 // avail_in equal to zero. There won't be anything to do, | |
1542 // but this is not an error situation so make sure we | |
1543 // return OK instead of BUF_ERROR at next call of deflate: | |
1544 last_flush = -1; | |
1545 return Z_OK; | |
1546 } | |
1547 | |
1548 // Make sure there is something to do and avoid duplicate consecutive | |
1549 // flushes. For repeated and useless calls with Z_FINISH, we keep | |
1550 // returning Z_STREAM_END instead of Z_BUFF_ERROR. | |
1551 } | |
1552 else if (strm.avail_in == 0 && flush <= old_flush && | |
1553 flush != Z_FINISH) { | |
1554 strm.msg = z_errmsg[Z_NEED_DICT - (Z_BUF_ERROR)]; | |
1555 return Z_BUF_ERROR; | |
1556 } | |
1557 | |
1558 // User must not provide more input after the first FINISH: | |
1559 if (status == FINISH_STATE && strm.avail_in != 0) { | |
1560 strm.msg = z_errmsg[Z_NEED_DICT - (Z_BUF_ERROR)]; | |
1561 return Z_BUF_ERROR; | |
1562 } | |
1563 | |
1564 // Start a new block or continue the current one. | |
1565 if (strm.avail_in != 0 || lookahead != 0 || | |
1566 (flush != Z_NO_FLUSH && status != FINISH_STATE)) { | |
1567 int bstate = -1; | |
1568 | |
1569 switch (config_table[level].func) { | |
1570 case STORED: | |
1571 bstate = deflate_stored(flush); | |
1572 break; | |
1573 | |
1574 case FAST: | |
1575 bstate = deflate_fast(flush); | |
1576 break; | |
1577 | |
1578 case SLOW: | |
1579 bstate = deflate_slow(flush); | |
1580 break; | |
1581 | |
1582 default: | |
1583 } | |
1584 | |
1585 if (bstate == FinishStarted || bstate == FinishDone) { | |
1586 status = FINISH_STATE; | |
1587 } | |
1588 | |
1589 if (bstate == NeedMore || bstate == FinishStarted) { | |
1590 if (strm.avail_out == 0) { | |
1591 last_flush = -1; // avoid BUF_ERROR next call, see above | |
1592 } | |
1593 | |
1594 return Z_OK; | |
1595 // If flush != Z_NO_FLUSH && avail_out == 0, the next call | |
1596 // of deflate should use the same flush parameter to make sure | |
1597 // that the flush is complete. So we don't have to output an | |
1598 // empty block here, this will be done at next call. This also | |
1599 // ensures that for a very small output buffer, we emit at most | |
1600 // one empty block. | |
1601 } | |
1602 | |
1603 if (bstate == BlockDone) { | |
1604 if (flush == Z_PARTIAL_FLUSH) { | |
1605 _tr_align(); | |
1606 } | |
1607 else { // FULL_FLUSH or SYNC_FLUSH | |
1608 _tr_stored_block(0, 0, false); | |
1609 | |
1610 // For a full flush, this empty block will be recognized | |
1611 // as a special marker by inflate_sync(). | |
1612 if (flush == Z_FULL_FLUSH) { | |
1613 //state.head[s.hash_size-1]=0; | |
1614 for (int i = 0; i < hash_size/*-1*/; i++) // forget history | |
1615 head[i] = 0; | |
1616 } | |
1617 } | |
1618 | |
1619 strm.flush_pending(); | |
1620 | |
1621 if (strm.avail_out == 0) { | |
1622 last_flush = -1; // avoid BUF_ERROR at next call, see above | |
1623 return Z_OK; | |
1624 } | |
1625 } | |
1626 } | |
1627 | |
1628 if (flush != Z_FINISH) return Z_OK; | |
1629 | |
1630 if (noheader != 0) return Z_STREAM_END; | |
1631 | |
1632 // Write the zlib trailer (adler32) | |
1633 putShortMSB((int)(strm.adler >>> 16)); | |
1634 putShortMSB((int)(strm.adler & 0xffff)); | |
1635 strm.flush_pending(); | |
1636 // If avail_out is zero, the application will call deflate again | |
1637 // to flush the rest. | |
1638 noheader = -1; // write the trailer only once! | |
1639 return pending != 0 ? Z_OK : Z_STREAM_END; | |
1640 } | |
1641 } |