1/* (c) Magnus Auvinen. See licence.txt in the root of the distribution for more information. */
2/* If you are missing that file, acquire a complete release at teeworlds.com. */
3#include "huffman.h"
4
5#include <base/system.h>
6
7#include <algorithm>
8
9const unsigned CHuffman::ms_aFreqTable[HUFFMAN_MAX_SYMBOLS] = {
10 1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186,
11 283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176,
12 872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19,
13 16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68,
14 59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9,
15 20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5,
16 15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71,
17 148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80,
18 167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60,
19 136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21,
20 32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21,
21 12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18,
22 9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335, 1517};
23
24struct CHuffmanConstructNode
25{
26 unsigned short m_NodeId;
27 int m_Frequency;
28};
29
30static bool CompareNodesByFrequencyDesc(const CHuffmanConstructNode *pNode1, const CHuffmanConstructNode *pNode2)
31{
32 return pNode2->m_Frequency < pNode1->m_Frequency;
33}
34
35void CHuffman::Setbits_r(CNode *pNode, int Bits, unsigned Depth)
36{
37 if(pNode->m_aLeaves[1] != 0xffff)
38 Setbits_r(pNode: &m_aNodes[pNode->m_aLeaves[1]], Bits: Bits | (1 << Depth), Depth: Depth + 1);
39 if(pNode->m_aLeaves[0] != 0xffff)
40 Setbits_r(pNode: &m_aNodes[pNode->m_aLeaves[0]], Bits, Depth: Depth + 1);
41
42 if(pNode->m_NumBits)
43 {
44 pNode->m_Bits = Bits;
45 pNode->m_NumBits = Depth;
46 }
47}
48
49void CHuffman::ConstructTree(const unsigned *pFrequencies)
50{
51 CHuffmanConstructNode aNodesLeftStorage[HUFFMAN_MAX_SYMBOLS];
52 CHuffmanConstructNode *apNodesLeft[HUFFMAN_MAX_SYMBOLS];
53 int NumNodesLeft = HUFFMAN_MAX_SYMBOLS;
54
55 // add the symbols
56 for(int i = 0; i < HUFFMAN_MAX_SYMBOLS; i++)
57 {
58 m_aNodes[i].m_NumBits = 0xFFFFFFFF;
59 m_aNodes[i].m_Symbol = i;
60 m_aNodes[i].m_aLeaves[0] = 0xffff;
61 m_aNodes[i].m_aLeaves[1] = 0xffff;
62
63 if(i == HUFFMAN_EOF_SYMBOL)
64 aNodesLeftStorage[i].m_Frequency = 1;
65 else
66 aNodesLeftStorage[i].m_Frequency = pFrequencies[i];
67 aNodesLeftStorage[i].m_NodeId = i;
68 apNodesLeft[i] = &aNodesLeftStorage[i];
69 }
70
71 m_NumNodes = HUFFMAN_MAX_SYMBOLS;
72
73 // construct the table
74 while(NumNodesLeft > 1)
75 {
76 std::stable_sort(first: apNodesLeft, last: apNodesLeft + NumNodesLeft, comp: CompareNodesByFrequencyDesc);
77
78 m_aNodes[m_NumNodes].m_NumBits = 0;
79 m_aNodes[m_NumNodes].m_aLeaves[0] = apNodesLeft[NumNodesLeft - 1]->m_NodeId;
80 m_aNodes[m_NumNodes].m_aLeaves[1] = apNodesLeft[NumNodesLeft - 2]->m_NodeId;
81 apNodesLeft[NumNodesLeft - 2]->m_NodeId = m_NumNodes;
82 apNodesLeft[NumNodesLeft - 2]->m_Frequency = apNodesLeft[NumNodesLeft - 1]->m_Frequency + apNodesLeft[NumNodesLeft - 2]->m_Frequency;
83
84 m_NumNodes++;
85 NumNodesLeft--;
86 }
87
88 // set start node
89 m_pStartNode = &m_aNodes[m_NumNodes - 1];
90
91 // build symbol bits
92 Setbits_r(pNode: m_pStartNode, Bits: 0, Depth: 0);
93}
94
95void CHuffman::Init(const unsigned *pFrequencies)
96{
97 // make sure to cleanout every thing
98 mem_zero(block: m_aNodes, size: sizeof(m_aNodes));
99 mem_zero(block: m_apDecodeLut, size: sizeof(m_apDecodeLut));
100 m_pStartNode = nullptr;
101 m_NumNodes = 0;
102
103 // construct the tree
104 ConstructTree(pFrequencies);
105
106 // build decode LUT
107 for(int i = 0; i < HUFFMAN_LUTSIZE; i++)
108 {
109 unsigned Bits = i;
110 int k;
111 CNode *pNode = m_pStartNode;
112 for(k = 0; k < HUFFMAN_LUTBITS; k++)
113 {
114 pNode = &m_aNodes[pNode->m_aLeaves[Bits & 1]];
115 Bits >>= 1;
116
117 if(!pNode)
118 break;
119
120 if(pNode->m_NumBits)
121 {
122 m_apDecodeLut[i] = pNode;
123 break;
124 }
125 }
126
127 if(k == HUFFMAN_LUTBITS)
128 m_apDecodeLut[i] = pNode;
129 }
130}
131
132//***************************************************************
133int CHuffman::Compress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const
134{
135 // this macro loads a symbol for a byte into bits and bitcount
136#define HUFFMAN_MACRO_LOADSYMBOL(Sym) \
137 do \
138 { \
139 Bits |= m_aNodes[Sym].m_Bits << Bitcount; \
140 Bitcount += m_aNodes[Sym].m_NumBits; \
141 } while(0)
142
143 // this macro writes the symbol stored in bits and bitcount to the dst pointer
144#define HUFFMAN_MACRO_WRITE() \
145 do \
146 { \
147 while(Bitcount >= 8) \
148 { \
149 *pDst++ = (unsigned char)(Bits & 0xff); \
150 if(pDst == pDstEnd) \
151 return -1; \
152 Bits >>= 8; \
153 Bitcount -= 8; \
154 } \
155 } while(0)
156
157 // setup buffer pointers
158 const unsigned char *pSrc = (const unsigned char *)pInput;
159 const unsigned char *pSrcEnd = pSrc + InputSize;
160 unsigned char *pDst = (unsigned char *)pOutput;
161 unsigned char *pDstEnd = pDst + OutputSize;
162
163 // symbol variables
164 unsigned Bits = 0;
165 unsigned Bitcount = 0;
166
167 // make sure that we have data that we want to compress
168 if(InputSize)
169 {
170 // {A} load the first symbol
171 int Symbol = *pSrc++;
172
173 while(pSrc != pSrcEnd)
174 {
175 // {B} load the symbol
176 HUFFMAN_MACRO_LOADSYMBOL(Symbol);
177
178 // {C} fetch next symbol, this is done here because it will reduce dependency in the code
179 Symbol = *pSrc++;
180
181 // {B} write the symbol loaded at
182 HUFFMAN_MACRO_WRITE();
183 }
184
185 // write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer
186 HUFFMAN_MACRO_LOADSYMBOL(Symbol);
187 HUFFMAN_MACRO_WRITE();
188 }
189
190 // write EOF symbol
191 HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL);
192 HUFFMAN_MACRO_WRITE();
193
194 // write out the last bits
195 *pDst++ = Bits;
196
197 // return the size of the output
198 return (int)(pDst - (const unsigned char *)pOutput);
199
200 // remove macros
201#undef HUFFMAN_MACRO_LOADSYMBOL
202#undef HUFFMAN_MACRO_WRITE
203}
204
205//***************************************************************
206int CHuffman::Decompress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const
207{
208 // setup buffer pointers
209 unsigned char *pDst = (unsigned char *)pOutput;
210 unsigned char *pSrc = (unsigned char *)pInput;
211 unsigned char *pDstEnd = pDst + OutputSize;
212 unsigned char *pSrcEnd = pSrc + InputSize;
213
214 unsigned Bits = 0;
215 unsigned Bitcount = 0;
216
217 const CNode *pEof = &m_aNodes[HUFFMAN_EOF_SYMBOL];
218
219 while(true)
220 {
221 // {A} try to load a node now, this will reduce dependency at location {D}
222 const CNode *pNode = nullptr;
223 if(Bitcount >= HUFFMAN_LUTBITS)
224 pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK];
225
226 // {B} fill with new bits
227 while(Bitcount < 24 && pSrc != pSrcEnd)
228 {
229 Bits |= (*pSrc++) << Bitcount;
230 Bitcount += 8;
231 }
232
233 // {C} load symbol now if we didn't that earlier at location {A}
234 if(!pNode)
235 pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK];
236
237 if(!pNode)
238 return -1;
239
240 // {D} check if we hit a symbol already
241 if(pNode->m_NumBits)
242 {
243 // remove the bits for that symbol
244 Bits >>= pNode->m_NumBits;
245 Bitcount -= pNode->m_NumBits;
246 }
247 else
248 {
249 // remove the bits that the lut checked up for us
250 Bits >>= HUFFMAN_LUTBITS;
251 Bitcount -= HUFFMAN_LUTBITS;
252
253 // walk the tree bit by bit
254 while(true)
255 {
256 // traverse tree
257 pNode = &m_aNodes[pNode->m_aLeaves[Bits & 1]];
258
259 // remove bit
260 Bitcount--;
261 Bits >>= 1;
262
263 // check if we hit a symbol
264 if(pNode->m_NumBits)
265 break;
266
267 // no more bits, decoding error
268 if(Bitcount == 0)
269 return -1;
270 }
271 }
272
273 // check for eof
274 if(pNode == pEof)
275 break;
276
277 // output character
278 if(pDst == pDstEnd)
279 return -1;
280 *pDst++ = pNode->m_Symbol;
281 }
282
283 // return the size of the decompressed buffer
284 return (int)(pDst - (const unsigned char *)pOutput);
285}
286