src/pkg/compress/flate/huffman_code.go - The Go Programming Language

Golang

Source file src/pkg/compress/flate/huffman_code.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	package flate
     6	
     7	import (
     8		"math"
     9		"sort"
    10	)
    11	
    12	type huffmanEncoder struct {
    13		codeBits []uint8
    14		code     []uint16
    15	}
    16	
    17	type literalNode struct {
    18		literal uint16
    19		freq    int32
    20	}
    21	
    22	type chain struct {
    23		// The sum of the leaves in this tree
    24		freq int32
    25	
    26		// The number of literals to the left of this item at this level
    27		leafCount int32
    28	
    29		// The right child of this chain in the previous level.
    30		up *chain
    31	}
    32	
    33	type levelInfo struct {
    34		// Our level.  for better printing
    35		level int32
    36	
    37		// The most recent chain generated for this level
    38		lastChain *chain
    39	
    40		// The frequency of the next character to add to this level
    41		nextCharFreq int32
    42	
    43		// The frequency of the next pair (from level below) to add to this level.
    44		// Only valid if the "needed" value of the next lower level is 0.
    45		nextPairFreq int32
    46	
    47		// The number of chains remaining to generate for this level before moving
    48		// up to the next level
    49		needed int32
    50	
    51		// The levelInfo for level+1
    52		up *levelInfo
    53	
    54		// The levelInfo for level-1
    55		down *levelInfo
    56	}
    57	
    58	func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
    59	
    60	func newHuffmanEncoder(size int) *huffmanEncoder {
    61		return &huffmanEncoder{make([]uint8, size), make([]uint16, size)}
    62	}
    63	
    64	// Generates a HuffmanCode corresponding to the fixed literal table
    65	func generateFixedLiteralEncoding() *huffmanEncoder {
    66		h := newHuffmanEncoder(maxLit)
    67		codeBits := h.codeBits
    68		code := h.code
    69		var ch uint16
    70		for ch = 0; ch < maxLit; ch++ {
    71			var bits uint16
    72			var size uint8
    73			switch {
    74			case ch < 144:
    75				// size 8, 000110000  .. 10111111
    76				bits = ch + 48
    77				size = 8
    78				break
    79			case ch < 256:
    80				// size 9, 110010000 .. 111111111
    81				bits = ch + 400 - 144
    82				size = 9
    83				break
    84			case ch < 280:
    85				// size 7, 0000000 .. 0010111
    86				bits = ch - 256
    87				size = 7
    88				break
    89			default:
    90				// size 8, 11000000 .. 11000111
    91				bits = ch + 192 - 280
    92				size = 8
    93			}
    94			codeBits[ch] = size
    95			code[ch] = reverseBits(bits, size)
    96		}
    97		return h
    98	}
    99	
   100	func generateFixedOffsetEncoding() *huffmanEncoder {
   101		h := newHuffmanEncoder(30)
   102		codeBits := h.codeBits
   103		code := h.code
   104		for ch := uint16(0); ch < 30; ch++ {
   105			codeBits[ch] = 5
   106			code[ch] = reverseBits(ch, 5)
   107		}
   108		return h
   109	}
   110	
   111	var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
   112	var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
   113	
   114	func (h *huffmanEncoder) bitLength(freq []int32) int64 {
   115		var total int64
   116		for i, f := range freq {
   117			if f != 0 {
   118				total += int64(f) * int64(h.codeBits[i])
   119			}
   120		}
   121		return total
   122	}
   123	
   124	// Return the number of literals assigned to each bit size in the Huffman encoding
   125	//
   126	// This method is only called when list.length >= 3
   127	// The cases of 0, 1, and 2 literals are handled by special case code.
   128	//
   129	// list  An array of the literals with non-zero frequencies
   130	//             and their associated frequencies.  The array is in order of increasing
   131	//             frequency, and has as its last element a special element with frequency
   132	//             MaxInt32
   133	// maxBits     The maximum number of bits that should be used to encode any literal.
   134	// return      An integer array in which array[i] indicates the number of literals
   135	//             that should be encoded in i bits.
   136	func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
   137		n := int32(len(list))
   138		list = list[0 : n+1]
   139		list[n] = maxNode()
   140	
   141		// The tree can't have greater depth than n - 1, no matter what.  This
   142		// saves a little bit of work in some small cases
   143		if maxBits > n-1 {
   144			maxBits = n - 1
   145		}
   146	
   147		// Create information about each of the levels.
   148		// A bogus "Level 0" whose sole purpose is so that
   149		// level1.prev.needed==0.  This makes level1.nextPairFreq
   150		// be a legitimate value that never gets chosen.
   151		top := &levelInfo{needed: 0}
   152		chain2 := &chain{list[1].freq, 2, new(chain)}
   153		for level := int32(1); level <= maxBits; level++ {
   154			// For every level, the first two items are the first two characters.
   155			// We initialize the levels as if we had already figured this out.
   156			top = &levelInfo{
   157				level:        level,
   158				lastChain:    chain2,
   159				nextCharFreq: list[2].freq,
   160				nextPairFreq: list[0].freq + list[1].freq,
   161				down:         top,
   162			}
   163			top.down.up = top
   164			if level == 1 {
   165				top.nextPairFreq = math.MaxInt32
   166			}
   167		}
   168	
   169		// We need a total of 2*n - 2 items at top level and have already generated 2.
   170		top.needed = 2*n - 4
   171	
   172		l := top
   173		for {
   174			if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
   175				// We've run out of both leafs and pairs.
   176				// End all calculations for this level.
   177				// To m sure we never come back to this level or any lower level,
   178				// set nextPairFreq impossibly large.
   179				l.lastChain = nil
   180				l.needed = 0
   181				l = l.up
   182				l.nextPairFreq = math.MaxInt32
   183				continue
   184			}
   185	
   186			prevFreq := l.lastChain.freq
   187			if l.nextCharFreq < l.nextPairFreq {
   188				// The next item on this row is a leaf node.
   189				n := l.lastChain.leafCount + 1
   190				l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up}
   191				l.nextCharFreq = list[n].freq
   192			} else {
   193				// The next item on this row is a pair from the previous row.
   194				// nextPairFreq isn't valid until we generate two
   195				// more values in the level below
   196				l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain}
   197				l.down.needed = 2
   198			}
   199	
   200			if l.needed--; l.needed == 0 {
   201				// We've done everything we need to do for this level.
   202				// Continue calculating one level up.  Fill in nextPairFreq
   203				// of that level with the sum of the two nodes we've just calculated on
   204				// this level.
   205				up := l.up
   206				if up == nil {
   207					// All done!
   208					break
   209				}
   210				up.nextPairFreq = prevFreq + l.lastChain.freq
   211				l = up
   212			} else {
   213				// If we stole from below, move down temporarily to replenish it.
   214				for l.down.needed > 0 {
   215					l = l.down
   216				}
   217			}
   218		}
   219	
   220		// Somethings is wrong if at the end, the top level is null or hasn't used
   221		// all of the leaves.
   222		if top.lastChain.leafCount != n {
   223			panic("top.lastChain.leafCount != n")
   224		}
   225	
   226		bitCount := make([]int32, maxBits+1)
   227		bits := 1
   228		for chain := top.lastChain; chain.up != nil; chain = chain.up {
   229			// chain.leafCount gives the number of literals requiring at least "bits"
   230			// bits to encode.
   231			bitCount[bits] = chain.leafCount - chain.up.leafCount
   232			bits++
   233		}
   234		return bitCount
   235	}
   236	
   237	// Look at the leaves and assign them a bit count and an encoding as specified
   238	// in RFC 1951 3.2.2
   239	func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
   240		code := uint16(0)
   241		for n, bits := range bitCount {
   242			code <<= 1
   243			if n == 0 || bits == 0 {
   244				continue
   245			}
   246			// The literals list[len(list)-bits] .. list[len(list)-bits]
   247			// are encoded using "bits" bits, and get the values
   248			// code, code + 1, ....  The code values are
   249			// assigned in literal order (not frequency order).
   250			chunk := list[len(list)-int(bits):]
   251			sortByLiteral(chunk)
   252			for _, node := range chunk {
   253				h.codeBits[node.literal] = uint8(n)
   254				h.code[node.literal] = reverseBits(code, uint8(n))
   255				code++
   256			}
   257			list = list[0 : len(list)-int(bits)]
   258		}
   259	}
   260	
   261	// Update this Huffman Code object to be the minimum code for the specified frequency count.
   262	//
   263	// freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
   264	// maxBits  The maximum number of bits to use for any literal.
   265	func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
   266		list := make([]literalNode, len(freq)+1)
   267		// Number of non-zero literals
   268		count := 0
   269		// Set list to be the set of all non-zero literals and their frequencies
   270		for i, f := range freq {
   271			if f != 0 {
   272				list[count] = literalNode{uint16(i), f}
   273				count++
   274			} else {
   275				h.codeBits[i] = 0
   276			}
   277		}
   278		// If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
   279		h.codeBits = h.codeBits[0:len(freq)]
   280		list = list[0:count]
   281		if count <= 2 {
   282			// Handle the small cases here, because they are awkward for the general case code.  With
   283			// two or fewer literals, everything has bit length 1.
   284			for i, node := range list {
   285				// "list" is in order of increasing literal value.
   286				h.codeBits[node.literal] = 1
   287				h.code[node.literal] = uint16(i)
   288			}
   289			return
   290		}
   291		sortByFreq(list)
   292	
   293		// Get the number of literals for each bit count
   294		bitCount := h.bitCounts(list, maxBits)
   295		// And do the assignment
   296		h.assignEncodingAndSize(bitCount, list)
   297	}
   298	
   299	type literalNodeSorter struct {
   300		a    []literalNode
   301		less func(i, j int) bool
   302	}
   303	
   304	func (s literalNodeSorter) Len() int { return len(s.a) }
   305	
   306	func (s literalNodeSorter) Less(i, j int) bool {
   307		return s.less(i, j)
   308	}
   309	
   310	func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
   311	
   312	func sortByFreq(a []literalNode) {
   313		s := &literalNodeSorter{a, func(i, j int) bool {
   314			if a[i].freq == a[j].freq {
   315				return a[i].literal < a[j].literal
   316			}
   317			return a[i].freq < a[j].freq
   318		}}
   319		sort.Sort(s)
   320	}
   321	
   322	func sortByLiteral(a []literalNode) {
   323		s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }}
   324		sort.Sort(s)
   325	}