1 | /* (c) Magnus Auvinen. See licence.txt in the root of the distribution for more information. */ |
2 | /* If you are missing that file, acquire a complete release at teeworlds.com. */ |
3 | #include "huffman.h" |
4 | #include <algorithm> |
5 | #include <base/system.h> |
6 | |
7 | const unsigned CHuffman::ms_aFreqTable[HUFFMAN_MAX_SYMBOLS] = { |
8 | 1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186, |
9 | 283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176, |
10 | 872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19, |
11 | 16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68, |
12 | 59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9, |
13 | 20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5, |
14 | 15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71, |
15 | 148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80, |
16 | 167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60, |
17 | 136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21, |
18 | 32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21, |
19 | 12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18, |
20 | 9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335, 1517}; |
21 | |
22 | struct CHuffmanConstructNode |
23 | { |
24 | unsigned short m_NodeId; |
25 | int m_Frequency; |
26 | }; |
27 | |
28 | bool CompareNodesByFrequencyDesc(const CHuffmanConstructNode *pNode1, const CHuffmanConstructNode *pNode2) |
29 | { |
30 | return pNode2->m_Frequency < pNode1->m_Frequency; |
31 | } |
32 | |
33 | void CHuffman::Setbits_r(CNode *pNode, int Bits, unsigned Depth) |
34 | { |
35 | if(pNode->m_aLeafs[1] != 0xffff) |
36 | Setbits_r(pNode: &m_aNodes[pNode->m_aLeafs[1]], Bits: Bits | (1 << Depth), Depth: Depth + 1); |
37 | if(pNode->m_aLeafs[0] != 0xffff) |
38 | Setbits_r(pNode: &m_aNodes[pNode->m_aLeafs[0]], Bits, Depth: Depth + 1); |
39 | |
40 | if(pNode->m_NumBits) |
41 | { |
42 | pNode->m_Bits = Bits; |
43 | pNode->m_NumBits = Depth; |
44 | } |
45 | } |
46 | |
47 | void CHuffman::ConstructTree(const unsigned *pFrequencies) |
48 | { |
49 | CHuffmanConstructNode aNodesLeftStorage[HUFFMAN_MAX_SYMBOLS]; |
50 | CHuffmanConstructNode *apNodesLeft[HUFFMAN_MAX_SYMBOLS]; |
51 | int NumNodesLeft = HUFFMAN_MAX_SYMBOLS; |
52 | |
53 | // add the symbols |
54 | for(int i = 0; i < HUFFMAN_MAX_SYMBOLS; i++) |
55 | { |
56 | m_aNodes[i].m_NumBits = 0xFFFFFFFF; |
57 | m_aNodes[i].m_Symbol = i; |
58 | m_aNodes[i].m_aLeafs[0] = 0xffff; |
59 | m_aNodes[i].m_aLeafs[1] = 0xffff; |
60 | |
61 | if(i == HUFFMAN_EOF_SYMBOL) |
62 | aNodesLeftStorage[i].m_Frequency = 1; |
63 | else |
64 | aNodesLeftStorage[i].m_Frequency = pFrequencies[i]; |
65 | aNodesLeftStorage[i].m_NodeId = i; |
66 | apNodesLeft[i] = &aNodesLeftStorage[i]; |
67 | } |
68 | |
69 | m_NumNodes = HUFFMAN_MAX_SYMBOLS; |
70 | |
71 | // construct the table |
72 | while(NumNodesLeft > 1) |
73 | { |
74 | std::stable_sort(first: apNodesLeft, last: apNodesLeft + NumNodesLeft, comp: CompareNodesByFrequencyDesc); |
75 | |
76 | m_aNodes[m_NumNodes].m_NumBits = 0; |
77 | m_aNodes[m_NumNodes].m_aLeafs[0] = apNodesLeft[NumNodesLeft - 1]->m_NodeId; |
78 | m_aNodes[m_NumNodes].m_aLeafs[1] = apNodesLeft[NumNodesLeft - 2]->m_NodeId; |
79 | apNodesLeft[NumNodesLeft - 2]->m_NodeId = m_NumNodes; |
80 | apNodesLeft[NumNodesLeft - 2]->m_Frequency = apNodesLeft[NumNodesLeft - 1]->m_Frequency + apNodesLeft[NumNodesLeft - 2]->m_Frequency; |
81 | |
82 | m_NumNodes++; |
83 | NumNodesLeft--; |
84 | } |
85 | |
86 | // set start node |
87 | m_pStartNode = &m_aNodes[m_NumNodes - 1]; |
88 | |
89 | // build symbol bits |
90 | Setbits_r(pNode: m_pStartNode, Bits: 0, Depth: 0); |
91 | } |
92 | |
93 | void CHuffman::Init(const unsigned *pFrequencies) |
94 | { |
95 | // make sure to cleanout every thing |
96 | mem_zero(block: m_aNodes, size: sizeof(m_aNodes)); |
97 | mem_zero(block: m_apDecodeLut, size: sizeof(m_apDecodeLut)); |
98 | m_pStartNode = 0x0; |
99 | m_NumNodes = 0; |
100 | |
101 | // construct the tree |
102 | ConstructTree(pFrequencies); |
103 | |
104 | // build decode LUT |
105 | for(int i = 0; i < HUFFMAN_LUTSIZE; i++) |
106 | { |
107 | unsigned Bits = i; |
108 | int k; |
109 | CNode *pNode = m_pStartNode; |
110 | for(k = 0; k < HUFFMAN_LUTBITS; k++) |
111 | { |
112 | pNode = &m_aNodes[pNode->m_aLeafs[Bits & 1]]; |
113 | Bits >>= 1; |
114 | |
115 | if(!pNode) |
116 | break; |
117 | |
118 | if(pNode->m_NumBits) |
119 | { |
120 | m_apDecodeLut[i] = pNode; |
121 | break; |
122 | } |
123 | } |
124 | |
125 | if(k == HUFFMAN_LUTBITS) |
126 | m_apDecodeLut[i] = pNode; |
127 | } |
128 | } |
129 | |
130 | //*************************************************************** |
131 | int CHuffman::Compress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const |
132 | { |
133 | // this macro loads a symbol for a byte into bits and bitcount |
134 | #define HUFFMAN_MACRO_LOADSYMBOL(Sym) \ |
135 | do \ |
136 | { \ |
137 | Bits |= m_aNodes[Sym].m_Bits << Bitcount; \ |
138 | Bitcount += m_aNodes[Sym].m_NumBits; \ |
139 | } while(0) |
140 | |
141 | // this macro writes the symbol stored in bits and bitcount to the dst pointer |
142 | #define HUFFMAN_MACRO_WRITE() \ |
143 | do \ |
144 | { \ |
145 | while(Bitcount >= 8) \ |
146 | { \ |
147 | *pDst++ = (unsigned char)(Bits & 0xff); \ |
148 | if(pDst == pDstEnd) \ |
149 | return -1; \ |
150 | Bits >>= 8; \ |
151 | Bitcount -= 8; \ |
152 | } \ |
153 | } while(0) |
154 | |
155 | // setup buffer pointers |
156 | const unsigned char *pSrc = (const unsigned char *)pInput; |
157 | const unsigned char *pSrcEnd = pSrc + InputSize; |
158 | unsigned char *pDst = (unsigned char *)pOutput; |
159 | unsigned char *pDstEnd = pDst + OutputSize; |
160 | |
161 | // symbol variables |
162 | unsigned Bits = 0; |
163 | unsigned Bitcount = 0; |
164 | |
165 | // make sure that we have data that we want to compress |
166 | if(InputSize) |
167 | { |
168 | // {A} load the first symbol |
169 | int Symbol = *pSrc++; |
170 | |
171 | while(pSrc != pSrcEnd) |
172 | { |
173 | // {B} load the symbol |
174 | HUFFMAN_MACRO_LOADSYMBOL(Symbol); |
175 | |
176 | // {C} fetch next symbol, this is done here because it will reduce dependency in the code |
177 | Symbol = *pSrc++; |
178 | |
179 | // {B} write the symbol loaded at |
180 | HUFFMAN_MACRO_WRITE(); |
181 | } |
182 | |
183 | // write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer |
184 | HUFFMAN_MACRO_LOADSYMBOL(Symbol); |
185 | HUFFMAN_MACRO_WRITE(); |
186 | } |
187 | |
188 | // write EOF symbol |
189 | HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL); |
190 | HUFFMAN_MACRO_WRITE(); |
191 | |
192 | // write out the last bits |
193 | *pDst++ = Bits; |
194 | |
195 | // return the size of the output |
196 | return (int)(pDst - (const unsigned char *)pOutput); |
197 | |
198 | // remove macros |
199 | #undef HUFFMAN_MACRO_LOADSYMBOL |
200 | #undef HUFFMAN_MACRO_WRITE |
201 | } |
202 | |
203 | //*************************************************************** |
204 | int CHuffman::Decompress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const |
205 | { |
206 | // setup buffer pointers |
207 | unsigned char *pDst = (unsigned char *)pOutput; |
208 | unsigned char *pSrc = (unsigned char *)pInput; |
209 | unsigned char *pDstEnd = pDst + OutputSize; |
210 | unsigned char *pSrcEnd = pSrc + InputSize; |
211 | |
212 | unsigned Bits = 0; |
213 | unsigned Bitcount = 0; |
214 | |
215 | const CNode *pEof = &m_aNodes[HUFFMAN_EOF_SYMBOL]; |
216 | |
217 | while(true) |
218 | { |
219 | // {A} try to load a node now, this will reduce dependency at location {D} |
220 | const CNode *pNode = 0; |
221 | if(Bitcount >= HUFFMAN_LUTBITS) |
222 | pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK]; |
223 | |
224 | // {B} fill with new bits |
225 | while(Bitcount < 24 && pSrc != pSrcEnd) |
226 | { |
227 | Bits |= (*pSrc++) << Bitcount; |
228 | Bitcount += 8; |
229 | } |
230 | |
231 | // {C} load symbol now if we didn't that earlier at location {A} |
232 | if(!pNode) |
233 | pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK]; |
234 | |
235 | if(!pNode) |
236 | return -1; |
237 | |
238 | // {D} check if we hit a symbol already |
239 | if(pNode->m_NumBits) |
240 | { |
241 | // remove the bits for that symbol |
242 | Bits >>= pNode->m_NumBits; |
243 | Bitcount -= pNode->m_NumBits; |
244 | } |
245 | else |
246 | { |
247 | // remove the bits that the lut checked up for us |
248 | Bits >>= HUFFMAN_LUTBITS; |
249 | Bitcount -= HUFFMAN_LUTBITS; |
250 | |
251 | // walk the tree bit by bit |
252 | while(true) |
253 | { |
254 | // traverse tree |
255 | pNode = &m_aNodes[pNode->m_aLeafs[Bits & 1]]; |
256 | |
257 | // remove bit |
258 | Bitcount--; |
259 | Bits >>= 1; |
260 | |
261 | // check if we hit a symbol |
262 | if(pNode->m_NumBits) |
263 | break; |
264 | |
265 | // no more bits, decoding error |
266 | if(Bitcount == 0) |
267 | return -1; |
268 | } |
269 | } |
270 | |
271 | // check for eof |
272 | if(pNode == pEof) |
273 | break; |
274 | |
275 | // output character |
276 | if(pDst == pDstEnd) |
277 | return -1; |
278 | *pDst++ = pNode->m_Symbol; |
279 | } |
280 | |
281 | // return the size of the decompressed buffer |
282 | return (int)(pDst - (const unsigned char *)pOutput); |
283 | } |
284 | |