Source file src/pkg/compress/flate/deflate.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "fmt" 9 "io" 10 "math" 11 ) 12 13 const ( 14 NoCompression = 0 15 BestSpeed = 1 16 fastCompression = 3 17 BestCompression = 9 18 DefaultCompression = -1 19 logWindowSize = 15 20 windowSize = 1 << logWindowSize 21 windowMask = windowSize - 1 22 logMaxOffsetSize = 15 // Standard DEFLATE 23 minMatchLength = 3 // The smallest match that the compressor looks for 24 maxMatchLength = 258 // The longest match for the compressor 25 minOffsetSize = 1 // The shortest offset that makes any sence 26 27 // The maximum number of tokens we put into a single flat block, just too 28 // stop things from getting too large. 29 maxFlateBlockTokens = 1 << 14 30 maxStoreBlockSize = 65535 31 hashBits = 17 32 hashSize = 1 << hashBits 33 hashMask = (1 << hashBits) - 1 34 hashShift = (hashBits + minMatchLength - 1) / minMatchLength 35 36 skipNever = math.MaxInt32 37 ) 38 39 type compressionLevel struct { 40 good, lazy, nice, chain, fastSkipHashing int 41 } 42 43 var levels = []compressionLevel{ 44 {}, // 0 45 // For levels 1-3 we don't bother trying with lazy matches 46 {3, 0, 8, 4, 4}, 47 {3, 0, 16, 8, 5}, 48 {3, 0, 32, 32, 6}, 49 // Levels 4-9 use increasingly more lazy matching 50 // and increasingly stringent conditions for "good enough". 51 {4, 4, 16, 16, skipNever}, 52 {8, 16, 32, 32, skipNever}, 53 {8, 16, 128, 128, skipNever}, 54 {8, 32, 128, 256, skipNever}, 55 {32, 128, 258, 1024, skipNever}, 56 {32, 258, 258, 4096, skipNever}, 57 } 58 59 type compressor struct { 60 compressionLevel 61 62 w *huffmanBitWriter 63 64 // compression algorithm 65 fill func(*compressor, []byte) int // copy data to window 66 step func(*compressor) // process window 67 sync bool // requesting flush 68 69 // Input hash chains 70 // hashHead[hashValue] contains the largest inputIndex with the specified hash value 71 // If hashHead[hashValue] is within the current window, then 72 // hashPrev[hashHead[hashValue] & windowMask] contains the previous index 73 // with the same hash value. 74 chainHead int 75 hashHead []int 76 hashPrev []int 77 hashOffset int 78 79 // input window: unprocessed data is window[index:windowEnd] 80 index int 81 window []byte 82 windowEnd int 83 blockStart int // window index where current tokens start 84 byteAvailable bool // if true, still need to process window[index-1]. 85 86 // queued output tokens 87 tokens []token 88 89 // deflate state 90 length int 91 offset int 92 hash int 93 maxInsertIndex int 94 err error 95 } 96 97 func (d *compressor) fillDeflate(b []byte) int { 98 if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { 99 // shift the window by windowSize 100 copy(d.window, d.window[windowSize:2*windowSize]) 101 d.index -= windowSize 102 d.windowEnd -= windowSize 103 if d.blockStart >= windowSize { 104 d.blockStart -= windowSize 105 } else { 106 d.blockStart = math.MaxInt32 107 } 108 d.hashOffset += windowSize 109 } 110 n := copy(d.window[d.windowEnd:], b) 111 d.windowEnd += n 112 return n 113 } 114 115 func (d *compressor) writeBlock(tokens []token, index int, eof bool) error { 116 if index > 0 || eof { 117 var window []byte 118 if d.blockStart <= index { 119 window = d.window[d.blockStart:index] 120 } 121 d.blockStart = index 122 d.w.writeBlock(tokens, eof, window) 123 return d.w.err 124 } 125 return nil 126 } 127 128 // Try to find a match starting at index whose length is greater than prevSize. 129 // We only look at chainCount possibilities before giving up. 130 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { 131 minMatchLook := maxMatchLength 132 if lookahead < minMatchLook { 133 minMatchLook = lookahead 134 } 135 136 win := d.window[0 : pos+minMatchLook] 137 138 // We quit when we get a match that's at least nice long 139 nice := len(win) - pos 140 if d.nice < nice { 141 nice = d.nice 142 } 143 144 // If we've got a match that's good enough, only look in 1/4 the chain. 145 tries := d.chain 146 length = prevLength 147 if length >= d.good { 148 tries >>= 2 149 } 150 151 w0 := win[pos] 152 w1 := win[pos+1] 153 wEnd := win[pos+length] 154 minIndex := pos - windowSize 155 156 for i := prevHead; tries > 0; tries-- { 157 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { 158 // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches 159 160 n := 3 161 for pos+n < len(win) && win[i+n] == win[pos+n] { 162 n++ 163 } 164 if n > length && (n > 3 || pos-i <= 4096) { 165 length = n 166 offset = pos - i 167 ok = true 168 if n >= nice { 169 // The match is good enough that we don't try to find a better one. 170 break 171 } 172 wEnd = win[pos+n] 173 } 174 } 175 if i == minIndex { 176 // hashPrev[i & windowMask] has already been overwritten, so stop now. 177 break 178 } 179 if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 { 180 break 181 } 182 } 183 return 184 } 185 186 func (d *compressor) writeStoredBlock(buf []byte) error { 187 if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { 188 return d.w.err 189 } 190 d.w.writeBytes(buf) 191 return d.w.err 192 } 193 194 func (d *compressor) initDeflate() { 195 d.hashHead = make([]int, hashSize) 196 d.hashPrev = make([]int, windowSize) 197 d.window = make([]byte, 2*windowSize) 198 d.hashOffset = 1 199 d.tokens = make([]token, 0, maxFlateBlockTokens+1) 200 d.length = minMatchLength - 1 201 d.offset = 0 202 d.byteAvailable = false 203 d.index = 0 204 d.hash = 0 205 d.chainHead = -1 206 } 207 208 func (d *compressor) deflate() { 209 if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { 210 return 211 } 212 213 d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) 214 if d.index < d.maxInsertIndex { 215 d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1]) 216 } 217 218 Loop: 219 for { 220 if d.index > d.windowEnd { 221 panic("index > windowEnd") 222 } 223 lookahead := d.windowEnd - d.index 224 if lookahead < minMatchLength+maxMatchLength { 225 if !d.sync { 226 break Loop 227 } 228 if d.index > d.windowEnd { 229 panic("index > windowEnd") 230 } 231 if lookahead == 0 { 232 // Flush current output block if any. 233 if d.byteAvailable { 234 // There is still one pending token that needs to be flushed 235 d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1]))) 236 d.byteAvailable = false 237 } 238 if len(d.tokens) > 0 { 239 if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { 240 return 241 } 242 d.tokens = d.tokens[:0] 243 } 244 break Loop 245 } 246 } 247 if d.index < d.maxInsertIndex { 248 // Update the hash 249 d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask 250 d.chainHead = d.hashHead[d.hash] 251 d.hashPrev[d.index&windowMask] = d.chainHead 252 d.hashHead[d.hash] = d.index + d.hashOffset 253 } 254 prevLength := d.length 255 prevOffset := d.offset 256 d.length = minMatchLength - 1 257 d.offset = 0 258 minIndex := d.index - windowSize 259 if minIndex < 0 { 260 minIndex = 0 261 } 262 263 if d.chainHead-d.hashOffset >= minIndex && 264 (d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 || 265 d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) { 266 if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok { 267 d.length = newLength 268 d.offset = newOffset 269 } 270 } 271 if d.fastSkipHashing != skipNever && d.length >= minMatchLength || 272 d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength { 273 // There was a match at the previous step, and the current match is 274 // not better. Output the previous match. 275 if d.fastSkipHashing != skipNever { 276 d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))) 277 } else { 278 d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))) 279 } 280 // Insert in the hash table all strings up to the end of the match. 281 // index and index-1 are already inserted. If there is not enough 282 // lookahead, the last two strings are not inserted into the hash 283 // table. 284 if d.length <= d.fastSkipHashing { 285 var newIndex int 286 if d.fastSkipHashing != skipNever { 287 newIndex = d.index + d.length 288 } else { 289 newIndex = d.index + prevLength - 1 290 } 291 for d.index++; d.index < newIndex; d.index++ { 292 if d.index < d.maxInsertIndex { 293 d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask 294 // Get previous value with the same hash. 295 // Our chain should point to the previous value. 296 d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] 297 // Set the head of the hash chain to us. 298 d.hashHead[d.hash] = d.index + d.hashOffset 299 } 300 } 301 if d.fastSkipHashing == skipNever { 302 d.byteAvailable = false 303 d.length = minMatchLength - 1 304 } 305 } else { 306 // For matches this long, we don't bother inserting each individual 307 // item into the table. 308 d.index += d.length 309 if d.index < d.maxInsertIndex { 310 d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1])) 311 } 312 } 313 if len(d.tokens) == maxFlateBlockTokens { 314 // The block includes the current character 315 if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { 316 return 317 } 318 d.tokens = d.tokens[:0] 319 } 320 } else { 321 if d.fastSkipHashing != skipNever || d.byteAvailable { 322 i := d.index - 1 323 if d.fastSkipHashing != skipNever { 324 i = d.index 325 } 326 d.tokens = append(d.tokens, literalToken(uint32(d.window[i]))) 327 if len(d.tokens) == maxFlateBlockTokens { 328 if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil { 329 return 330 } 331 d.tokens = d.tokens[:0] 332 } 333 } 334 d.index++ 335 if d.fastSkipHashing == skipNever { 336 d.byteAvailable = true 337 } 338 } 339 } 340 } 341 342 func (d *compressor) fillStore(b []byte) int { 343 n := copy(d.window[d.windowEnd:], b) 344 d.windowEnd += n 345 return n 346 } 347 348 func (d *compressor) store() { 349 if d.windowEnd > 0 { 350 d.err = d.writeStoredBlock(d.window[:d.windowEnd]) 351 } 352 d.windowEnd = 0 353 } 354 355 func (d *compressor) write(b []byte) (n int, err error) { 356 n = len(b) 357 b = b[d.fill(d, b):] 358 for len(b) > 0 { 359 d.step(d) 360 b = b[d.fill(d, b):] 361 } 362 return n, d.err 363 } 364 365 func (d *compressor) syncFlush() error { 366 d.sync = true 367 d.step(d) 368 if d.err == nil { 369 d.w.writeStoredHeader(0, false) 370 d.w.flush() 371 d.err = d.w.err 372 } 373 d.sync = false 374 return d.err 375 } 376 377 func (d *compressor) init(w io.Writer, level int) (err error) { 378 d.w = newHuffmanBitWriter(w) 379 380 switch { 381 case level == NoCompression: 382 d.window = make([]byte, maxStoreBlockSize) 383 d.fill = (*compressor).fillStore 384 d.step = (*compressor).store 385 case level == DefaultCompression: 386 level = 6 387 fallthrough 388 case 1 <= level && level <= 9: 389 d.compressionLevel = levels[level] 390 d.initDeflate() 391 d.fill = (*compressor).fillDeflate 392 d.step = (*compressor).deflate 393 default: 394 return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level) 395 } 396 return nil 397 } 398 399 func (d *compressor) close() error { 400 d.sync = true 401 d.step(d) 402 if d.err != nil { 403 return d.err 404 } 405 if d.w.writeStoredHeader(0, true); d.w.err != nil { 406 return d.w.err 407 } 408 d.w.flush() 409 return d.w.err 410 } 411 412 // NewWriter returns a new Writer compressing data at the given level. 413 // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); 414 // higher levels typically run slower but compress more. Level 0 415 // (NoCompression) does not attempt any compression; it only adds the 416 // necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default 417 // compression level. 418 // 419 // If level is in the range [-1, 9] then the error returned will be nil. 420 // Otherwise the error returned will be non-nil. 421 func NewWriter(w io.Writer, level int) (*Writer, error) { 422 const logWindowSize = logMaxOffsetSize 423 var dw Writer 424 if err := dw.d.init(w, level); err != nil { 425 return nil, err 426 } 427 return &dw, nil 428 } 429 430 // NewWriterDict is like NewWriter but initializes the new 431 // Writer with a preset dictionary. The returned Writer behaves 432 // as if the dictionary had been written to it without producing 433 // any compressed output. The compressed data written to w 434 // can only be decompressed by a Reader initialized with the 435 // same dictionary. 436 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { 437 dw := &dictWriter{w, false} 438 zw, err := NewWriter(dw, level) 439 if err != nil { 440 return nil, err 441 } 442 zw.Write(dict) 443 zw.Flush() 444 dw.enabled = true 445 return zw, err 446 } 447 448 type dictWriter struct { 449 w io.Writer 450 enabled bool 451 } 452 453 func (w *dictWriter) Write(b []byte) (n int, err error) { 454 if w.enabled { 455 return w.w.Write(b) 456 } 457 return len(b), nil 458 } 459 460 // A Writer takes data written to it and writes the compressed 461 // form of that data to an underlying writer (see NewWriter). 462 type Writer struct { 463 d compressor 464 } 465 466 // Write writes data to w, which will eventually write the 467 // compressed form of data to its underlying writer. 468 func (w *Writer) Write(data []byte) (n int, err error) { 469 return w.d.write(data) 470 } 471 472 // Flush flushes any pending compressed data to the underlying writer. 473 // It is useful mainly in compressed network protocols, to ensure that 474 // a remote reader has enough data to reconstruct a packet. 475 // Flush does not return until the data has been written. 476 // If the underlying writer returns an error, Flush returns that error. 477 // 478 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. 479 func (w *Writer) Flush() error { 480 // For more about flushing: 481 // http://www.bolet.org/~pornin/deflate-flush.html 482 return w.d.syncFlush() 483 } 484 485 // Close flushes and closes the writer. 486 func (w *Writer) Close() error { 487 return w.d.close() 488 }