Source file src/pkg/compress/bzip2/huffman.go
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package bzip2
6
7 import "sort"
8
9 // A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a
10 // symbol.
11 type huffmanTree struct {
12 // nodes contains all the non-leaf nodes in the tree. nodes[0] is the
13 // root of the tree and nextNode contains the index of the next element
14 // of nodes to use when the tree is being constructed.
15 nodes []huffmanNode
16 nextNode int
17 }
18
19 // A huffmanNode is a node in the tree. left and right contain indexes into the
20 // nodes slice of the tree. If left or right is invalidNodeValue then the child
21 // is a left node and its value is in leftValue/rightValue.
22 //
23 // The symbols are uint16s because bzip2 encodes not only MTF indexes in the
24 // tree, but also two magic values for run-length encoding and an EOF symbol.
25 // Thus there are more than 256 possible symbols.
26 type huffmanNode struct {
27 left, right uint16
28 leftValue, rightValue uint16
29 }
30
31 // invalidNodeValue is an invalid index which marks a leaf node in the tree.
32 const invalidNodeValue = 0xffff
33
34 // Decode reads bits from the given bitReader and navigates the tree until a
35 // symbol is found.
36 func (t huffmanTree) Decode(br *bitReader) (v uint16) {
37 nodeIndex := uint16(0) // node 0 is the root of the tree.
38
39 for {
40 node := &t.nodes[nodeIndex]
41 bit := br.ReadBit()
42 // bzip2 encodes left as a true bit.
43 if bit {
44 // left
45 if node.left == invalidNodeValue {
46 return node.leftValue
47 }
48 nodeIndex = node.left
49 } else {
50 // right
51 if node.right == invalidNodeValue {
52 return node.rightValue
53 }
54 nodeIndex = node.right
55 }
56 }
57
58 panic("unreachable")
59 }
60
61 // newHuffmanTree builds a Huffman tree from a slice containing the code
62 // lengths of each symbol. The maximum code length is 32 bits.
63 func newHuffmanTree(lengths []uint8) (huffmanTree, error) {
64 // There are many possible trees that assign the same code length to
65 // each symbol (consider reflecting a tree down the middle, for
66 // example). Since the code length assignments determine the
67 // efficiency of the tree, each of these trees is equally good. In
68 // order to minimize the amount of information needed to build a tree
69 // bzip2 uses a canonical tree so that it can be reconstructed given
70 // only the code length assignments.
71
72 if len(lengths) < 2 {
73 panic("newHuffmanTree: too few symbols")
74 }
75
76 var t huffmanTree
77
78 // First we sort the code length assignments by ascending code length,
79 // using the symbol value to break ties.
80 pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths)))
81 for i, length := range lengths {
82 pairs[i].value = uint16(i)
83 pairs[i].length = length
84 }
85
86 sort.Sort(pairs)
87
88 // Now we assign codes to the symbols, starting with the longest code.
89 // We keep the codes packed into a uint32, at the most-significant end.
90 // So branches are taken from the MSB downwards. This makes it easy to
91 // sort them later.
92 code := uint32(0)
93 length := uint8(32)
94
95 codes := huffmanCodes(make([]huffmanCode, len(lengths)))
96 for i := len(pairs) - 1; i >= 0; i-- {
97 if length > pairs[i].length {
98 // If the code length decreases we shift in order to
99 // zero any bits beyond the end of the code.
100 length >>= 32 - pairs[i].length
101 length <<= 32 - pairs[i].length
102 length = pairs[i].length
103 }
104 codes[i].code = code
105 codes[i].codeLen = length
106 codes[i].value = pairs[i].value
107 // We need to 'increment' the code, which means treating |code|
108 // like a |length| bit number.
109 code += 1 << (32 - length)
110 }
111
112 // Now we can sort by the code so that the left half of each branch are
113 // grouped together, recursively.
114 sort.Sort(codes)
115
116 t.nodes = make([]huffmanNode, len(codes))
117 _, err := buildHuffmanNode(&t, codes, 0)
118 return t, err
119 }
120
121 // huffmanSymbolLengthPair contains a symbol and its code length.
122 type huffmanSymbolLengthPair struct {
123 value uint16
124 length uint8
125 }
126
127 // huffmanSymbolLengthPair is used to provide an interface for sorting.
128 type huffmanSymbolLengthPairs []huffmanSymbolLengthPair
129
130 func (h huffmanSymbolLengthPairs) Len() int {
131 return len(h)
132 }
133
134 func (h huffmanSymbolLengthPairs) Less(i, j int) bool {
135 if h[i].length < h[j].length {
136 return true
137 }
138 if h[i].length > h[j].length {
139 return false
140 }
141 if h[i].value < h[j].value {
142 return true
143 }
144 return false
145 }
146
147 func (h huffmanSymbolLengthPairs) Swap(i, j int) {
148 h[i], h[j] = h[j], h[i]
149 }
150
151 // huffmanCode contains a symbol, its code and code length.
152 type huffmanCode struct {
153 code uint32
154 codeLen uint8
155 value uint16
156 }
157
158 // huffmanCodes is used to provide an interface for sorting.
159 type huffmanCodes []huffmanCode
160
161 func (n huffmanCodes) Len() int {
162 return len(n)
163 }
164
165 func (n huffmanCodes) Less(i, j int) bool {
166 return n[i].code < n[j].code
167 }
168
169 func (n huffmanCodes) Swap(i, j int) {
170 n[i], n[j] = n[j], n[i]
171 }
172
173 // buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in
174 // the Huffman tree at the given level. It returns the index of the newly
175 // constructed node.
176 func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error) {
177 test := uint32(1) << (31 - level)
178
179 // We have to search the list of codes to find the divide between the left and right sides.
180 firstRightIndex := len(codes)
181 for i, code := range codes {
182 if code.code&test != 0 {
183 firstRightIndex = i
184 break
185 }
186 }
187
188 left := codes[:firstRightIndex]
189 right := codes[firstRightIndex:]
190
191 if len(left) == 0 || len(right) == 0 {
192 return 0, StructuralError("superfluous level in Huffman tree")
193 }
194
195 nodeIndex = uint16(t.nextNode)
196 node := &t.nodes[t.nextNode]
197 t.nextNode++
198
199 if len(left) == 1 {
200 // leaf node
201 node.left = invalidNodeValue
202 node.leftValue = left[0].value
203 } else {
204 node.left, err = buildHuffmanNode(t, left, level+1)
205 }
206
207 if err != nil {
208 return
209 }
210
211 if len(right) == 1 {
212 // leaf node
213 node.right = invalidNodeValue
214 node.rightValue = right[0].value
215 } else {
216 node.right, err = buildHuffmanNode(t, right, level+1)
217 }
218
219 return
220 }