| 1 | /* (c) Magnus Auvinen. See licence.txt in the root of the distribution for more information. */ |
| 2 | /* If you are missing that file, acquire a complete release at teeworlds.com. */ |
| 3 | #include "huffman.h" |
| 4 | |
| 5 | #include <base/system.h> |
| 6 | |
| 7 | #include <algorithm> |
| 8 | |
| 9 | const unsigned CHuffman::ms_aFreqTable[HUFFMAN_MAX_SYMBOLS] = { |
| 10 | 1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186, |
| 11 | 283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176, |
| 12 | 872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19, |
| 13 | 16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68, |
| 14 | 59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9, |
| 15 | 20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5, |
| 16 | 15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71, |
| 17 | 148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80, |
| 18 | 167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60, |
| 19 | 136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21, |
| 20 | 32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21, |
| 21 | 12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18, |
| 22 | 9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335, 1517}; |
| 23 | |
| 24 | struct CHuffmanConstructNode |
| 25 | { |
| 26 | unsigned short m_NodeId; |
| 27 | int m_Frequency; |
| 28 | }; |
| 29 | |
| 30 | static bool CompareNodesByFrequencyDesc(const CHuffmanConstructNode *pNode1, const CHuffmanConstructNode *pNode2) |
| 31 | { |
| 32 | return pNode2->m_Frequency < pNode1->m_Frequency; |
| 33 | } |
| 34 | |
| 35 | void CHuffman::Setbits_r(CNode *pNode, int Bits, unsigned Depth) |
| 36 | { |
| 37 | if(pNode->m_aLeaves[1] != 0xffff) |
| 38 | Setbits_r(pNode: &m_aNodes[pNode->m_aLeaves[1]], Bits: Bits | (1 << Depth), Depth: Depth + 1); |
| 39 | if(pNode->m_aLeaves[0] != 0xffff) |
| 40 | Setbits_r(pNode: &m_aNodes[pNode->m_aLeaves[0]], Bits, Depth: Depth + 1); |
| 41 | |
| 42 | if(pNode->m_NumBits) |
| 43 | { |
| 44 | pNode->m_Bits = Bits; |
| 45 | pNode->m_NumBits = Depth; |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | void CHuffman::ConstructTree(const unsigned *pFrequencies) |
| 50 | { |
| 51 | CHuffmanConstructNode aNodesLeftStorage[HUFFMAN_MAX_SYMBOLS]; |
| 52 | CHuffmanConstructNode *apNodesLeft[HUFFMAN_MAX_SYMBOLS]; |
| 53 | int NumNodesLeft = HUFFMAN_MAX_SYMBOLS; |
| 54 | |
| 55 | // add the symbols |
| 56 | for(int i = 0; i < HUFFMAN_MAX_SYMBOLS; i++) |
| 57 | { |
| 58 | m_aNodes[i].m_NumBits = 0xFFFFFFFF; |
| 59 | m_aNodes[i].m_Symbol = i; |
| 60 | m_aNodes[i].m_aLeaves[0] = 0xffff; |
| 61 | m_aNodes[i].m_aLeaves[1] = 0xffff; |
| 62 | |
| 63 | if(i == HUFFMAN_EOF_SYMBOL) |
| 64 | aNodesLeftStorage[i].m_Frequency = 1; |
| 65 | else |
| 66 | aNodesLeftStorage[i].m_Frequency = pFrequencies[i]; |
| 67 | aNodesLeftStorage[i].m_NodeId = i; |
| 68 | apNodesLeft[i] = &aNodesLeftStorage[i]; |
| 69 | } |
| 70 | |
| 71 | m_NumNodes = HUFFMAN_MAX_SYMBOLS; |
| 72 | |
| 73 | // construct the table |
| 74 | while(NumNodesLeft > 1) |
| 75 | { |
| 76 | std::stable_sort(first: apNodesLeft, last: apNodesLeft + NumNodesLeft, comp: CompareNodesByFrequencyDesc); |
| 77 | |
| 78 | m_aNodes[m_NumNodes].m_NumBits = 0; |
| 79 | m_aNodes[m_NumNodes].m_aLeaves[0] = apNodesLeft[NumNodesLeft - 1]->m_NodeId; |
| 80 | m_aNodes[m_NumNodes].m_aLeaves[1] = apNodesLeft[NumNodesLeft - 2]->m_NodeId; |
| 81 | apNodesLeft[NumNodesLeft - 2]->m_NodeId = m_NumNodes; |
| 82 | apNodesLeft[NumNodesLeft - 2]->m_Frequency = apNodesLeft[NumNodesLeft - 1]->m_Frequency + apNodesLeft[NumNodesLeft - 2]->m_Frequency; |
| 83 | |
| 84 | m_NumNodes++; |
| 85 | NumNodesLeft--; |
| 86 | } |
| 87 | |
| 88 | // set start node |
| 89 | m_pStartNode = &m_aNodes[m_NumNodes - 1]; |
| 90 | |
| 91 | // build symbol bits |
| 92 | Setbits_r(pNode: m_pStartNode, Bits: 0, Depth: 0); |
| 93 | } |
| 94 | |
| 95 | void CHuffman::Init(const unsigned *pFrequencies) |
| 96 | { |
| 97 | // make sure to cleanout every thing |
| 98 | mem_zero(block: m_aNodes, size: sizeof(m_aNodes)); |
| 99 | mem_zero(block: m_apDecodeLut, size: sizeof(m_apDecodeLut)); |
| 100 | m_pStartNode = nullptr; |
| 101 | m_NumNodes = 0; |
| 102 | |
| 103 | // construct the tree |
| 104 | ConstructTree(pFrequencies); |
| 105 | |
| 106 | // build decode LUT |
| 107 | for(int i = 0; i < HUFFMAN_LUTSIZE; i++) |
| 108 | { |
| 109 | unsigned Bits = i; |
| 110 | int k; |
| 111 | CNode *pNode = m_pStartNode; |
| 112 | for(k = 0; k < HUFFMAN_LUTBITS; k++) |
| 113 | { |
| 114 | pNode = &m_aNodes[pNode->m_aLeaves[Bits & 1]]; |
| 115 | Bits >>= 1; |
| 116 | |
| 117 | if(!pNode) |
| 118 | break; |
| 119 | |
| 120 | if(pNode->m_NumBits) |
| 121 | { |
| 122 | m_apDecodeLut[i] = pNode; |
| 123 | break; |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | if(k == HUFFMAN_LUTBITS) |
| 128 | m_apDecodeLut[i] = pNode; |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | //*************************************************************** |
| 133 | int CHuffman::Compress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const |
| 134 | { |
| 135 | // this macro loads a symbol for a byte into bits and bitcount |
| 136 | #define HUFFMAN_MACRO_LOADSYMBOL(Sym) \ |
| 137 | do \ |
| 138 | { \ |
| 139 | Bits |= m_aNodes[Sym].m_Bits << Bitcount; \ |
| 140 | Bitcount += m_aNodes[Sym].m_NumBits; \ |
| 141 | } while(0) |
| 142 | |
| 143 | // this macro writes the symbol stored in bits and bitcount to the dst pointer |
| 144 | #define HUFFMAN_MACRO_WRITE() \ |
| 145 | do \ |
| 146 | { \ |
| 147 | while(Bitcount >= 8) \ |
| 148 | { \ |
| 149 | *pDst++ = (unsigned char)(Bits & 0xff); \ |
| 150 | if(pDst == pDstEnd) \ |
| 151 | return -1; \ |
| 152 | Bits >>= 8; \ |
| 153 | Bitcount -= 8; \ |
| 154 | } \ |
| 155 | } while(0) |
| 156 | |
| 157 | // setup buffer pointers |
| 158 | const unsigned char *pSrc = (const unsigned char *)pInput; |
| 159 | const unsigned char *pSrcEnd = pSrc + InputSize; |
| 160 | unsigned char *pDst = (unsigned char *)pOutput; |
| 161 | unsigned char *pDstEnd = pDst + OutputSize; |
| 162 | |
| 163 | // symbol variables |
| 164 | unsigned Bits = 0; |
| 165 | unsigned Bitcount = 0; |
| 166 | |
| 167 | // make sure that we have data that we want to compress |
| 168 | if(InputSize) |
| 169 | { |
| 170 | // {A} load the first symbol |
| 171 | int Symbol = *pSrc++; |
| 172 | |
| 173 | while(pSrc != pSrcEnd) |
| 174 | { |
| 175 | // {B} load the symbol |
| 176 | HUFFMAN_MACRO_LOADSYMBOL(Symbol); |
| 177 | |
| 178 | // {C} fetch next symbol, this is done here because it will reduce dependency in the code |
| 179 | Symbol = *pSrc++; |
| 180 | |
| 181 | // {B} write the symbol loaded at |
| 182 | HUFFMAN_MACRO_WRITE(); |
| 183 | } |
| 184 | |
| 185 | // write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer |
| 186 | HUFFMAN_MACRO_LOADSYMBOL(Symbol); |
| 187 | HUFFMAN_MACRO_WRITE(); |
| 188 | } |
| 189 | |
| 190 | // write EOF symbol |
| 191 | HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL); |
| 192 | HUFFMAN_MACRO_WRITE(); |
| 193 | |
| 194 | // write out the last bits |
| 195 | *pDst++ = Bits; |
| 196 | |
| 197 | // return the size of the output |
| 198 | return (int)(pDst - (const unsigned char *)pOutput); |
| 199 | |
| 200 | // remove macros |
| 201 | #undef HUFFMAN_MACRO_LOADSYMBOL |
| 202 | #undef HUFFMAN_MACRO_WRITE |
| 203 | } |
| 204 | |
| 205 | //*************************************************************** |
| 206 | int CHuffman::Decompress(const void *pInput, int InputSize, void *pOutput, int OutputSize) const |
| 207 | { |
| 208 | // setup buffer pointers |
| 209 | unsigned char *pDst = (unsigned char *)pOutput; |
| 210 | unsigned char *pSrc = (unsigned char *)pInput; |
| 211 | unsigned char *pDstEnd = pDst + OutputSize; |
| 212 | unsigned char *pSrcEnd = pSrc + InputSize; |
| 213 | |
| 214 | unsigned Bits = 0; |
| 215 | unsigned Bitcount = 0; |
| 216 | |
| 217 | const CNode *pEof = &m_aNodes[HUFFMAN_EOF_SYMBOL]; |
| 218 | |
| 219 | while(true) |
| 220 | { |
| 221 | // {A} try to load a node now, this will reduce dependency at location {D} |
| 222 | const CNode *pNode = nullptr; |
| 223 | if(Bitcount >= HUFFMAN_LUTBITS) |
| 224 | pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK]; |
| 225 | |
| 226 | // {B} fill with new bits |
| 227 | while(Bitcount < 24 && pSrc != pSrcEnd) |
| 228 | { |
| 229 | Bits |= (*pSrc++) << Bitcount; |
| 230 | Bitcount += 8; |
| 231 | } |
| 232 | |
| 233 | // {C} load symbol now if we didn't that earlier at location {A} |
| 234 | if(!pNode) |
| 235 | pNode = m_apDecodeLut[Bits & HUFFMAN_LUTMASK]; |
| 236 | |
| 237 | if(!pNode) |
| 238 | return -1; |
| 239 | |
| 240 | // {D} check if we hit a symbol already |
| 241 | if(pNode->m_NumBits) |
| 242 | { |
| 243 | // remove the bits for that symbol |
| 244 | Bits >>= pNode->m_NumBits; |
| 245 | Bitcount -= pNode->m_NumBits; |
| 246 | } |
| 247 | else |
| 248 | { |
| 249 | // remove the bits that the lut checked up for us |
| 250 | Bits >>= HUFFMAN_LUTBITS; |
| 251 | Bitcount -= HUFFMAN_LUTBITS; |
| 252 | |
| 253 | // walk the tree bit by bit |
| 254 | while(true) |
| 255 | { |
| 256 | // traverse tree |
| 257 | pNode = &m_aNodes[pNode->m_aLeaves[Bits & 1]]; |
| 258 | |
| 259 | // remove bit |
| 260 | Bitcount--; |
| 261 | Bits >>= 1; |
| 262 | |
| 263 | // check if we hit a symbol |
| 264 | if(pNode->m_NumBits) |
| 265 | break; |
| 266 | |
| 267 | // no more bits, decoding error |
| 268 | if(Bitcount == 0) |
| 269 | return -1; |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | // check for eof |
| 274 | if(pNode == pEof) |
| 275 | break; |
| 276 | |
| 277 | // output character |
| 278 | if(pDst == pDstEnd) |
| 279 | return -1; |
| 280 | *pDst++ = pNode->m_Symbol; |
| 281 | } |
| 282 | |
| 283 | // return the size of the decompressed buffer |
| 284 | return (int)(pDst - (const unsigned char *)pOutput); |
| 285 | } |
| 286 | |