1/* (c) Magnus Auvinen. See licence.txt in the root of the distribution for more information. */
2/* If you are missing that file, acquire a complete release at teeworlds.com. */
3#include "huffman.h"
4#include <algorithm>
5#include <base/system.h>
6
7const unsigned CHuffman::ms_aFreqTable[HUFFMAN_MAX_SYMBOLS] = {
8 1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186,
9 283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176,
10 872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19,
11 16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68,
12 59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9,
13 20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5,
14 15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71,
15 148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80,
16 167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60,
17 136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21,
18 32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21,
19 12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18,
20 9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335, 1517};
21
22struct CHuffmanConstructNode
23{
24 unsigned short m_NodeId;
25 int m_Frequency;
26};
27
28bool CompareNodesByFrequencyDesc(const CHuffmanConstructNode *pNode1, const CHuffmanConstructNode *pNode2)
29{
30 return pNode2->m_Frequency < pNode1->m_Frequency;
31}
32
33void CHuffman::Setbits_r(CNode *pNode, int Bits, unsigned Depth)
34{
35 if(pNode->m_aLeafs[1] != 0xffff)
36 Setbits_r(pNode: &m_aNodes[pNode->m_aLeafs[1]], Bits: Bits | (1 << Depth), Depth: Depth + 1);
37 if(pNode->m_aLeafs[0] != 0xffff)
38 Setbits_r(pNode: &m_aNodes[pNode->m_aLeafs[0]], Bits, Depth: Depth + 1);
39
40 if(pNode->m_NumBits)
41 {
42 pNode->m_Bits = Bits;
43 pNode->m_NumBits = Depth;
44 }
45}
46
47void CHuffman::ConstructTree(const unsigned *pFrequencies)
48{
49 CHuffmanConstructNode aNodesLeftStorage[HUFFMAN_MAX_SYMBOLS];
50 CHuffmanConstructNode *apNodesLeft[HUFFMAN_MAX_SYMBOLS];
51 int NumNodesLeft = HUFFMAN_MAX_SYMBOLS;
52
53 // add the symbols
54 for(int i = 0; i < HUFFMAN_MAX_SYMBOLS; i++)
55 {
56 m_aNodes[i].m_NumBits = 0xFFFFFFFF;
57 m_aNodes[i].m_Symbol = i;
58 m_aNodes[i].m_aLeafs[0] = 0xffff;
59 m_aNodes[i].m_aLeafs[1] = 0xffff;
60
61 if(i == HUFFMAN_EOF_SYMBOL)
62 aNodesLeftStorage[i].m_Frequency = 1;
63 else
64 aNodesLeftStorage[i].m_Frequency = pFrequencies[i];
65 aNodesLeftStorage[i].m_NodeId = i;
66 apNodesLeft[i] = &aNodesLeftStorage[i];
67 }
68
69 m_NumNodes = HUFFMAN_MAX_SYMBOLS;
70
71 // construct the table
72 while(NumNodesLeft > 1)
73 {
74 std::stable_sort(first: apNodesLeft, last: apNodesLeft + NumNodesLeft, comp: CompareNodesByFrequencyDesc);
75
76 m_aNodes[m_NumNodes].m_NumBits = 0;
77 m_aNodes[m_NumNodes].m_aLeafs[0] = apNodesLeft[NumNodesLeft - 1]->m_NodeId;
78 m_aNodes[m_NumNodes].m_aLeafs[1] = apNodesLeft[NumNodesLeft - 2]->m_NodeId;
79 apNodesLeft[NumNodesLeft - 2]->m_NodeId = m_NumNodes;
80 apNodesLeft[NumNodesLeft - 2]->m_Frequency = apNodesLeft[NumNodesLeft - 1]->m_Frequency + apNodesLeft[NumNodesLeft - 2]->m_Frequency;
81
82 m_NumNodes++;
83 NumNodesLeft--;
84 }
85
86 // set start node
87 m_pStartNode = &m_aNodes[m_NumNodes - 1];
88
89 // build symbol bits
90 Setbits_r(pNode: m_pStartNode, Bits: 0, Depth: 0);
91}
92
93void CHuffman::Init(const unsigned *pFrequencies)
94{
95 // make sure to cleanout every thing
96 mem_zero(block: m_aNodes, size: sizeof(m_aNodes));
97 mem_zero(block: m_apDecodeLut, size: sizeof(m_apDecodeLut));
98 m_pStartNode = 0x0;
99 m_NumNodes = 0;
100
101 // construct the tree
102 ConstructTree(pFrequencies);
103
104 // build decode LUT
105 for(int i = 0; i < HUFFMAN_LUTSIZE; i++)
106 {
107 unsigned Bits = i;
108 int k;
109 CNode *pNode = m_pStartNode;
110 for(k = 0; k < HUFFMAN_LUTBITS; k++)
111 {
112 pNode = &m_aNodes[pNode->m_aLeafs[Bits & 1]];
113 Bits >>= 1;
114
115 if(!pNode)
116 break;
117
118 if(pNode->m_NumBits)
119 {
120 m_apDecodeLut[i] = pNode;
121 break;
122 }
123 }
124
125 if(k == HUFFMAN_LUTBITS)
126 m_apDecodeLut[i] = pNode;
127 }
128}
129
130//***************************************************************
131int CHuffman::Compress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const
132{
133 // this macro loads a symbol for a byte into bits and bitcount
134#define HUFFMAN_MACRO_LOADSYMBOL(Sym) \
135 do \
136 { \
137 Bits |= m_aNodes[Sym].m_Bits << Bitcount; \
138 Bitcount += m_aNodes[Sym].m_NumBits; \
139 } while(0)
140
141 // this macro writes the symbol stored in bits and bitcount to the dst pointer
142#define HUFFMAN_MACRO_WRITE() \
143 do \
144 { \
145 while(Bitcount >= 8) \
146 { \
147 *pDst++ = (unsigned char)(Bits & 0xff); \
148 if(pDst == pDstEnd) \
149 return -1; \
150 Bits >>= 8; \
151 Bitcount -= 8; \
152 } \
153 } while(0)
154
155 // setup buffer pointers
156 const unsigned char *pSrc = (const unsigned char *)pInput;
157 const unsigned char *pSrcEnd = pSrc + InputSize;
158 unsigned char *pDst = (unsigned char *)pOutput;
159 unsigned char *pDstEnd = pDst + OutputSize;
160
161 // symbol variables
162 unsigned Bits = 0;
163 unsigned Bitcount = 0;
164
165 // make sure that we have data that we want to compress
166 if(InputSize)
167 {
168 // {A} load the first symbol
169 int Symbol = *pSrc++;
170
171 while(pSrc != pSrcEnd)
172 {
173 // {B} load the symbol
174 HUFFMAN_MACRO_LOADSYMBOL(Symbol);
175
176 // {C} fetch next symbol, this is done here because it will reduce dependency in the code
177 Symbol = *pSrc++;
178
179 // {B} write the symbol loaded at
180 HUFFMAN_MACRO_WRITE();
181 }
182
183 // write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer
184 HUFFMAN_MACRO_LOADSYMBOL(Symbol);
185 HUFFMAN_MACRO_WRITE();
186 }
187
188 // write EOF symbol
189 HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL);
190 HUFFMAN_MACRO_WRITE();
191
192 // write out the last bits
193 *pDst++ = Bits;
194
195 // return the size of the output
196 return (int)(pDst - (const unsigned char *)pOutput);
197
198 // remove macros
199#undef HUFFMAN_MACRO_LOADSYMBOL
200#undef HUFFMAN_MACRO_WRITE
201}
202
203//***************************************************************
204int CHuffman::Decompress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const
205{
206 // setup buffer pointers
207 unsigned char *pDst = (unsigned char *)pOutput;
208 unsigned char *pSrc = (unsigned char *)pInput;
209 unsigned char *pDstEnd = pDst + OutputSize;
210 unsigned char *pSrcEnd = pSrc + InputSize;
211
212 unsigned Bits = 0;
213 unsigned Bitcount = 0;
214
215 const CNode *pEof = &m_aNodes[HUFFMAN_EOF_SYMBOL];
216
217 while(true)
218 {
219 // {A} try to load a node now, this will reduce dependency at location {D}
220 const CNode *pNode = 0;
221 if(Bitcount >= HUFFMAN_LUTBITS)
222 pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK];
223
224 // {B} fill with new bits
225 while(Bitcount < 24 && pSrc != pSrcEnd)
226 {
227 Bits |= (*pSrc++) << Bitcount;
228 Bitcount += 8;
229 }
230
231 // {C} load symbol now if we didn't that earlier at location {A}
232 if(!pNode)
233 pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK];
234
235 if(!pNode)
236 return -1;
237
238 // {D} check if we hit a symbol already
239 if(pNode->m_NumBits)
240 {
241 // remove the bits for that symbol
242 Bits >>= pNode->m_NumBits;
243 Bitcount -= pNode->m_NumBits;
244 }
245 else
246 {
247 // remove the bits that the lut checked up for us
248 Bits >>= HUFFMAN_LUTBITS;
249 Bitcount -= HUFFMAN_LUTBITS;
250
251 // walk the tree bit by bit
252 while(true)
253 {
254 // traverse tree
255 pNode = &m_aNodes[pNode->m_aLeafs[Bits & 1]];
256
257 // remove bit
258 Bitcount--;
259 Bits >>= 1;
260
261 // check if we hit a symbol
262 if(pNode->m_NumBits)
263 break;
264
265 // no more bits, decoding error
266 if(Bitcount == 0)
267 return -1;
268 }
269 }
270
271 // check for eof
272 if(pNode == pEof)
273 break;
274
275 // output character
276 if(pDst == pDstEnd)
277 return -1;
278 *pDst++ = pNode->m_Symbol;
279 }
280
281 // return the size of the decompressed buffer
282 return (int)(pDst - (const unsigned char *)pOutput);
283}
284