Source file src/pkg/compress/flate/deflate.go
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package flate
6
7 import (
8 "fmt"
9 "io"
10 "math"
11 )
12
13 const (
14 NoCompression = 0
15 BestSpeed = 1
16 fastCompression = 3
17 BestCompression = 9
18 DefaultCompression = -1
19 logWindowSize = 15
20 windowSize = 1 << logWindowSize
21 windowMask = windowSize - 1
22 logMaxOffsetSize = 15 // Standard DEFLATE
23 minMatchLength = 3 // The smallest match that the compressor looks for
24 maxMatchLength = 258 // The longest match for the compressor
25 minOffsetSize = 1 // The shortest offset that makes any sence
26
27 // The maximum number of tokens we put into a single flat block, just too
28 // stop things from getting too large.
29 maxFlateBlockTokens = 1 << 14
30 maxStoreBlockSize = 65535
31 hashBits = 17
32 hashSize = 1 << hashBits
33 hashMask = (1 << hashBits) - 1
34 hashShift = (hashBits + minMatchLength - 1) / minMatchLength
35
36 skipNever = math.MaxInt32
37 )
38
39 type compressionLevel struct {
40 good, lazy, nice, chain, fastSkipHashing int
41 }
42
43 var levels = []compressionLevel{
44 {}, // 0
45 // For levels 1-3 we don't bother trying with lazy matches
46 {3, 0, 8, 4, 4},
47 {3, 0, 16, 8, 5},
48 {3, 0, 32, 32, 6},
49 // Levels 4-9 use increasingly more lazy matching
50 // and increasingly stringent conditions for "good enough".
51 {4, 4, 16, 16, skipNever},
52 {8, 16, 32, 32, skipNever},
53 {8, 16, 128, 128, skipNever},
54 {8, 32, 128, 256, skipNever},
55 {32, 128, 258, 1024, skipNever},
56 {32, 258, 258, 4096, skipNever},
57 }
58
59 type compressor struct {
60 compressionLevel
61
62 w *huffmanBitWriter
63
64 // compression algorithm
65 fill func(*compressor, []byte) int // copy data to window
66 step func(*compressor) // process window
67 sync bool // requesting flush
68
69 // Input hash chains
70 // hashHead[hashValue] contains the largest inputIndex with the specified hash value
71 // If hashHead[hashValue] is within the current window, then
72 // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
73 // with the same hash value.
74 chainHead int
75 hashHead []int
76 hashPrev []int
77 hashOffset int
78
79 // input window: unprocessed data is window[index:windowEnd]
80 index int
81 window []byte
82 windowEnd int
83 blockStart int // window index where current tokens start
84 byteAvailable bool // if true, still need to process window[index-1].
85
86 // queued output tokens
87 tokens []token
88
89 // deflate state
90 length int
91 offset int
92 hash int
93 maxInsertIndex int
94 err error
95 }
96
97 func (d *compressor) fillDeflate(b []byte) int {
98 if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
99 // shift the window by windowSize
100 copy(d.window, d.window[windowSize:2*windowSize])
101 d.index -= windowSize
102 d.windowEnd -= windowSize
103 if d.blockStart >= windowSize {
104 d.blockStart -= windowSize
105 } else {
106 d.blockStart = math.MaxInt32
107 }
108 d.hashOffset += windowSize
109 }
110 n := copy(d.window[d.windowEnd:], b)
111 d.windowEnd += n
112 return n
113 }
114
115 func (d *compressor) writeBlock(tokens []token, index int, eof bool) error {
116 if index > 0 || eof {
117 var window []byte
118 if d.blockStart <= index {
119 window = d.window[d.blockStart:index]
120 }
121 d.blockStart = index
122 d.w.writeBlock(tokens, eof, window)
123 return d.w.err
124 }
125 return nil
126 }
127
128 // Try to find a match starting at index whose length is greater than prevSize.
129 // We only look at chainCount possibilities before giving up.
130 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
131 minMatchLook := maxMatchLength
132 if lookahead < minMatchLook {
133 minMatchLook = lookahead
134 }
135
136 win := d.window[0 : pos+minMatchLook]
137
138 // We quit when we get a match that's at least nice long
139 nice := len(win) - pos
140 if d.nice < nice {
141 nice = d.nice
142 }
143
144 // If we've got a match that's good enough, only look in 1/4 the chain.
145 tries := d.chain
146 length = prevLength
147 if length >= d.good {
148 tries >>= 2
149 }
150
151 w0 := win[pos]
152 w1 := win[pos+1]
153 wEnd := win[pos+length]
154 minIndex := pos - windowSize
155
156 for i := prevHead; tries > 0; tries-- {
157 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
158 // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
159
160 n := 3
161 for pos+n < len(win) && win[i+n] == win[pos+n] {
162 n++
163 }
164 if n > length && (n > 3 || pos-i <= 4096) {
165 length = n
166 offset = pos - i
167 ok = true
168 if n >= nice {
169 // The match is good enough that we don't try to find a better one.
170 break
171 }
172 wEnd = win[pos+n]
173 }
174 }
175 if i == minIndex {
176 // hashPrev[i & windowMask] has already been overwritten, so stop now.
177 break
178 }
179 if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 {
180 break
181 }
182 }
183 return
184 }
185
186 func (d *compressor) writeStoredBlock(buf []byte) error {
187 if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
188 return d.w.err
189 }
190 d.w.writeBytes(buf)
191 return d.w.err
192 }
193
194 func (d *compressor) initDeflate() {
195 d.hashHead = make([]int, hashSize)
196 d.hashPrev = make([]int, windowSize)
197 d.window = make([]byte, 2*windowSize)
198 d.hashOffset = 1
199 d.tokens = make([]token, 0, maxFlateBlockTokens+1)
200 d.length = minMatchLength - 1
201 d.offset = 0
202 d.byteAvailable = false
203 d.index = 0
204 d.hash = 0
205 d.chainHead = -1
206 }
207
208 func (d *compressor) deflate() {
209 if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
210 return
211 }
212
213 d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
214 if d.index < d.maxInsertIndex {
215 d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
216 }
217
218 Loop:
219 for {
220 if d.index > d.windowEnd {
221 panic("index > windowEnd")
222 }
223 lookahead := d.windowEnd - d.index
224 if lookahead < minMatchLength+maxMatchLength {
225 if !d.sync {
226 break Loop
227 }
228 if d.index > d.windowEnd {
229 panic("index > windowEnd")
230 }
231 if lookahead == 0 {
232 // Flush current output block if any.
233 if d.byteAvailable {
234 // There is still one pending token that needs to be flushed
235 d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1])))
236 d.byteAvailable = false
237 }
238 if len(d.tokens) > 0 {
239 if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
240 return
241 }
242 d.tokens = d.tokens[:0]
243 }
244 break Loop
245 }
246 }
247 if d.index < d.maxInsertIndex {
248 // Update the hash
249 d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
250 d.chainHead = d.hashHead[d.hash]
251 d.hashPrev[d.index&windowMask] = d.chainHead
252 d.hashHead[d.hash] = d.index + d.hashOffset
253 }
254 prevLength := d.length
255 prevOffset := d.offset
256 d.length = minMatchLength - 1
257 d.offset = 0
258 minIndex := d.index - windowSize
259 if minIndex < 0 {
260 minIndex = 0
261 }
262
263 if d.chainHead-d.hashOffset >= minIndex &&
264 (d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
265 d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
266 if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
267 d.length = newLength
268 d.offset = newOffset
269 }
270 }
271 if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
272 d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
273 // There was a match at the previous step, and the current match is
274 // not better. Output the previous match.
275 if d.fastSkipHashing != skipNever {
276 d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)))
277 } else {
278 d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)))
279 }
280 // Insert in the hash table all strings up to the end of the match.
281 // index and index-1 are already inserted. If there is not enough
282 // lookahead, the last two strings are not inserted into the hash
283 // table.
284 if d.length <= d.fastSkipHashing {
285 var newIndex int
286 if d.fastSkipHashing != skipNever {
287 newIndex = d.index + d.length
288 } else {
289 newIndex = d.index + prevLength - 1
290 }
291 for d.index++; d.index < newIndex; d.index++ {
292 if d.index < d.maxInsertIndex {
293 d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
294 // Get previous value with the same hash.
295 // Our chain should point to the previous value.
296 d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
297 // Set the head of the hash chain to us.
298 d.hashHead[d.hash] = d.index + d.hashOffset
299 }
300 }
301 if d.fastSkipHashing == skipNever {
302 d.byteAvailable = false
303 d.length = minMatchLength - 1
304 }
305 } else {
306 // For matches this long, we don't bother inserting each individual
307 // item into the table.
308 d.index += d.length
309 if d.index < d.maxInsertIndex {
310 d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
311 }
312 }
313 if len(d.tokens) == maxFlateBlockTokens {
314 // The block includes the current character
315 if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
316 return
317 }
318 d.tokens = d.tokens[:0]
319 }
320 } else {
321 if d.fastSkipHashing != skipNever || d.byteAvailable {
322 i := d.index - 1
323 if d.fastSkipHashing != skipNever {
324 i = d.index
325 }
326 d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
327 if len(d.tokens) == maxFlateBlockTokens {
328 if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
329 return
330 }
331 d.tokens = d.tokens[:0]
332 }
333 }
334 d.index++
335 if d.fastSkipHashing == skipNever {
336 d.byteAvailable = true
337 }
338 }
339 }
340 }
341
342 func (d *compressor) fillStore(b []byte) int {
343 n := copy(d.window[d.windowEnd:], b)
344 d.windowEnd += n
345 return n
346 }
347
348 func (d *compressor) store() {
349 if d.windowEnd > 0 {
350 d.err = d.writeStoredBlock(d.window[:d.windowEnd])
351 }
352 d.windowEnd = 0
353 }
354
355 func (d *compressor) write(b []byte) (n int, err error) {
356 n = len(b)
357 b = b[d.fill(d, b):]
358 for len(b) > 0 {
359 d.step(d)
360 b = b[d.fill(d, b):]
361 }
362 return n, d.err
363 }
364
365 func (d *compressor) syncFlush() error {
366 d.sync = true
367 d.step(d)
368 if d.err == nil {
369 d.w.writeStoredHeader(0, false)
370 d.w.flush()
371 d.err = d.w.err
372 }
373 d.sync = false
374 return d.err
375 }
376
377 func (d *compressor) init(w io.Writer, level int) (err error) {
378 d.w = newHuffmanBitWriter(w)
379
380 switch {
381 case level == NoCompression:
382 d.window = make([]byte, maxStoreBlockSize)
383 d.fill = (*compressor).fillStore
384 d.step = (*compressor).store
385 case level == DefaultCompression:
386 level = 6
387 fallthrough
388 case 1 <= level && level <= 9:
389 d.compressionLevel = levels[level]
390 d.initDeflate()
391 d.fill = (*compressor).fillDeflate
392 d.step = (*compressor).deflate
393 default:
394 return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level)
395 }
396 return nil
397 }
398
399 func (d *compressor) close() error {
400 d.sync = true
401 d.step(d)
402 if d.err != nil {
403 return d.err
404 }
405 if d.w.writeStoredHeader(0, true); d.w.err != nil {
406 return d.w.err
407 }
408 d.w.flush()
409 return d.w.err
410 }
411
412 // NewWriter returns a new Writer compressing data at the given level.
413 // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
414 // higher levels typically run slower but compress more. Level 0
415 // (NoCompression) does not attempt any compression; it only adds the
416 // necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default
417 // compression level.
418 //
419 // If level is in the range [-1, 9] then the error returned will be nil.
420 // Otherwise the error returned will be non-nil.
421 func NewWriter(w io.Writer, level int) (*Writer, error) {
422 const logWindowSize = logMaxOffsetSize
423 var dw Writer
424 if err := dw.d.init(w, level); err != nil {
425 return nil, err
426 }
427 return &dw, nil
428 }
429
430 // NewWriterDict is like NewWriter but initializes the new
431 // Writer with a preset dictionary. The returned Writer behaves
432 // as if the dictionary had been written to it without producing
433 // any compressed output. The compressed data written to w
434 // can only be decompressed by a Reader initialized with the
435 // same dictionary.
436 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
437 dw := &dictWriter{w, false}
438 zw, err := NewWriter(dw, level)
439 if err != nil {
440 return nil, err
441 }
442 zw.Write(dict)
443 zw.Flush()
444 dw.enabled = true
445 return zw, err
446 }
447
448 type dictWriter struct {
449 w io.Writer
450 enabled bool
451 }
452
453 func (w *dictWriter) Write(b []byte) (n int, err error) {
454 if w.enabled {
455 return w.w.Write(b)
456 }
457 return len(b), nil
458 }
459
460 // A Writer takes data written to it and writes the compressed
461 // form of that data to an underlying writer (see NewWriter).
462 type Writer struct {
463 d compressor
464 }
465
466 // Write writes data to w, which will eventually write the
467 // compressed form of data to its underlying writer.
468 func (w *Writer) Write(data []byte) (n int, err error) {
469 return w.d.write(data)
470 }
471
472 // Flush flushes any pending compressed data to the underlying writer.
473 // It is useful mainly in compressed network protocols, to ensure that
474 // a remote reader has enough data to reconstruct a packet.
475 // Flush does not return until the data has been written.
476 // If the underlying writer returns an error, Flush returns that error.
477 //
478 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
479 func (w *Writer) Flush() error {
480 // For more about flushing:
481 // http://www.bolet.org/~pornin/deflate-flush.html
482 return w.d.syncFlush()
483 }
484
485 // Close flushes and closes the writer.
486 func (w *Writer) Close() error {
487 return w.d.close()
488 }