opencv/3rdparty/openexr/IlmImf/ImfFastHuf.cpp

///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2009-2014 DreamWorks Animation LLC. 
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// *       Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// *       Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// *       Neither the name of DreamWorks Animation nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////

#include "ImfFastHuf.h"
#include <Iex.h>

#include <string.h>
#include <assert.h>
#include <math.h>
#include <vector>

OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER

//
// Adapted from hufUnpackEncTable - 
// We don't need to reconstruct the code book, just the encoded
// lengths for each symbol. From the lengths, we can build the
// base + offset tables. This should be a bit more efficient
// for sparse code books.
// 
//   table     - ptr to the start of the code length data. Will be
//               updated as we decode data
//
//   numBytes  - size of the encoded table (I think)?
//
//   minSymbol - smallest symbol in the code book
//
//   maxSymbol - largest symbol in the code book. 
//
//   rleSymbol - the symbol to trigger RLE in the encoded bitstream
//

FastHufDecoder::FastHufDecoder
    (const char *&table,
     int numBytes,
     int minSymbol,
     int maxSymbol,
     int rleSymbol)
:
    _rleSymbol (rleSymbol),
    _numSymbols (0),
    _minCodeLength (255),
    _maxCodeLength (0),
    _idToSymbol (0)
{
    //
    // List of symbols that we find with non-zero code lengths
    // (listed in the order we find them). Store these in the
    // same format as the code book stores codes + lengths - 
    // low 6 bits are the length, everything above that is
    // the symbol.
    //

    std::vector<Int64> symbols;

    //
    // The 'base' table is the minimum code at each code length. base[i]
    // is the smallest code (numerically) of length i.
    //

    Int64 base[MAX_CODE_LEN + 1];     

    //
    // The 'offset' table is the position (in sorted order) of the first id
    // of a given code lenght. Array is indexed by code length, like base.  
    //

    Int64 offset[MAX_CODE_LEN + 1];   

    //
    // Count of how many codes at each length there are. Array is 
    // indexed by code length, like base and offset.
    //

    size_t codeCount[MAX_CODE_LEN + 1];    

    for (int i = 0; i <= MAX_CODE_LEN; ++i)
    {
        codeCount[i] = 0;
        base[i]      = 0xffffffffffffffffULL;
        offset[i]    = 0;
    }

    //
    // Count the number of codes, the min/max code lengths, the number of
    // codes with each length, and record symbols with non-zero code
    // length as we find them.
    //

    const char *currByte     = table;
    Int64       currBits     = 0;
    int         currBitCount = 0;

    const int SHORT_ZEROCODE_RUN = 59;
    const int LONG_ZEROCODE_RUN  = 63;
    const int SHORTEST_LONG_RUN  = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;

    for (Int64 symbol = minSymbol; symbol <= maxSymbol; symbol++)
    {
        if (currByte - table > numBytes)
        {
            throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
                                           "(Truncated table data).");
        }

        //
        // Next code length - either:
        //       0-58  (literal code length)
        //       59-62 (various lengths runs of 0)
        //       63    (run of n 0's, with n is the next 8 bits)
        //

        Int64 codeLen = readBits (6, currBits, currBitCount, currByte);

        if (codeLen == (Int64) LONG_ZEROCODE_RUN)
        {
            if (currByte - table > numBytes)
            {
                throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
                                               "(Truncated table data).");
            }

            int runLen = readBits (8, currBits, currBitCount, currByte) +
                         SHORTEST_LONG_RUN;

            if (symbol + runLen > maxSymbol + 1)
            {
                throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
                                               "(Run beyond end of table).");
            }
            
            symbol += runLen - 1;

        }
        else if (codeLen >= (Int64) SHORT_ZEROCODE_RUN)
        {
            int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;

            if (symbol + runLen > maxSymbol + 1)
            {
                throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "
                                               "(Run beyond end of table).");
            }

            symbol += runLen - 1;

        }
        else if (codeLen != 0)
        {
            symbols.push_back ((symbol << 6) | (codeLen & 63));

            if (codeLen < _minCodeLength)
                _minCodeLength = codeLen;

            if (codeLen > _maxCodeLength)
                _maxCodeLength = codeLen;

            codeCount[codeLen]++;
        }
    }

    for (int i = 0; i < MAX_CODE_LEN; ++i)
        _numSymbols += codeCount[i];

    table = currByte;

    //
    // Compute base - once we have the code length counts, there
    //                is a closed form solution for this
    //

    {
        double* countTmp = new double[_maxCodeLength+1];

        for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
        {
            countTmp[l] = (double)codeCount[l] * 
                          (double)(2 << (_maxCodeLength-l));
        }
    
        for (int l = _minCodeLength; l <= _maxCodeLength; ++l)
        {
            double tmp = 0;

            for (int k =l + 1; k <= _maxCodeLength; ++k)
                tmp += countTmp[k];
            
            tmp /= (double)(2 << (_maxCodeLength - l));

            base[l] = (Int64)ceil (tmp);
        }

        delete [] countTmp;
    }
   
    //
    // Compute offset - these are the positions of the first
    //                  id (not symbol) that has length [i]
    //

    offset[_maxCodeLength] = 0;

    for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)
        offset[i] = offset[i + 1] + codeCount[i + 1];

    //
    // Allocate and fill the symbol-to-id mapping. Smaller Ids should be
    // mapped to less-frequent symbols (which have longer codes). Use
    // the offset table to tell us where the id's for a given code 
    // length start off.
    //

    _idToSymbol = new int[_numSymbols];

    Int64 mapping[MAX_CODE_LEN + 1];
    for (int i = 0; i < MAX_CODE_LEN + 1; ++i) 
        mapping[i] = -1;
    for (int i = _minCodeLength; i <= _maxCodeLength; ++i)
        mapping[i] = offset[i];

    for (std::vector<Int64>::const_iterator i = symbols.begin(); 
         i != symbols.end();
         ++i)
    {
        int codeLen = *i & 63;
        int symbol  = *i >> 6;

        if (mapping[codeLen] >= _numSymbols)
            throw IEX_NAMESPACE::InputExc ("Huffman decode error "
                                           "(Invalid symbol in header).");
        
        _idToSymbol[mapping[codeLen]] = symbol;
        mapping[codeLen]++;
    }

    buildTables(base, offset);
}


FastHufDecoder::~FastHufDecoder()
{
    delete[] _idToSymbol;
}


//
// Static check if the decoder is enabled.
//
// ATM, I only have access to little endian hardware for testing,
// so I'm not entirely sure that we are reading fom the bit stream
// properly on BE. 
//
// If you happen to have more obscure hardware, check that the 
// byte swapping in refill() is happening sensable, add an endian 
// check if needed, and fix the preprocessor magic here.
//

#define READ64(c) \
    ((Int64)(c)[0] << 56) | ((Int64)(c)[1] << 48) | ((Int64)(c)[2] << 40) | \
    ((Int64)(c)[3] << 32) | ((Int64)(c)[4] << 24) | ((Int64)(c)[5] << 16) | \
    ((Int64)(c)[6] <<  8) | ((Int64)(c)[7] ) 

#ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts
    #if defined (__i386__) || defined(__x86_64__)
        #undef  READ64
        #define READ64(c) _bswap64 (*(const Int64*)(c))
    #endif
#endif


bool
FastHufDecoder::enabled()
{
    #if defined(__INTEL_COMPILER) || defined(__GNUC__)  

        //
        // Enabled for ICC, GCC:
        //       __i386__   -> x86
        //       __x86_64__ -> 64-bit x86
        //

        #if defined (__i386__) || defined(__x86_64__)
            return true;
        #else
            return false;
        #endif

    #elif defined (_MSC_VER)

        //
        // Enabled for Visual Studio:
        //        _M_IX86 -> x86
        //        _M_X64  -> 64bit x86

        #if defined (_M_IX86) || defined(_M_X64)
            return true;
        #else
            return false;
        #endif

    #else

        //
        // Unknown compiler - Be safe and disable.
        //
        return false;
    #endif
}

//
//
// Built the acceleration tables for lookups on the upper bits
// as well as the 'LJ' tables.
//

void
FastHufDecoder::buildTables (Int64 *base, Int64 *offset)
{
    //
    // Build the 'left justified' base table, by shifting base left..
    //

    for (int i = 0; i <= MAX_CODE_LEN; ++i)
    {
        if (base[i] != 0xffffffffffffffffULL)
        {
            _ljBase[i] = base[i] << (64 - i);
        }
        else
        {
            //
            // Unused code length - insert dummy values
            //

            _ljBase[i] = 0xffffffffffffffffULL;
        }
    }

    //
    // Combine some terms into a big fat constant, which for
    // lack of a better term we'll call the 'left justified' 
    // offset table (because it serves the same function
    // as 'offset', when using the left justified base table.
    //

    for (int i = 0; i <= MAX_CODE_LEN; ++i)
        _ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));

    //
    // Build the acceleration tables for the lookups of
    // short codes ( <= TABLE_LOOKUP_BITS long)
    //

    for (Int64 i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)
    {
        Int64 value = i << (64 - TABLE_LOOKUP_BITS);

        _tableSymbol[i]  = 0xffff;
        _tableCodeLen[i] = 0; 

        for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)
        {
            if (_ljBase[codeLen] <= value)
            {
                _tableCodeLen[i] = codeLen;

                Int64 id = _ljOffset[codeLen] + (value >> (64 - codeLen));
                if (id < _numSymbols)
                {
                    _tableSymbol[i] = _idToSymbol[id];
                }
                else
                {
                    throw IEX_NAMESPACE::InputExc ("Huffman decode error "
                                                   "(Overrun).");
                }
                break;
            }
        }
    }

    //
    // Store the smallest value in the table that points to real data.
    // This should be the entry for the largest length that has 
    // valid data (in our case, non-dummy _ljBase)
    //

    int minIdx = TABLE_LOOKUP_BITS;

    while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffULL)
        minIdx--;

    if (minIdx < 0)
    {
        //
        // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.
        // Set the min value such that the table is never tested.
        //

        _tableMin = 0xffffffffffffffffULL;
    }
    else
    {
        _tableMin = _ljBase[minIdx];
    }
}


// 
// For decoding, we're holding onto 2 Int64's. 
//
// The first (buffer), holds the next bits from the bitstream to be 
// decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS
// valid bits to decode the next symbol. For other paths, we need a full
// 64-bits to decode a symbol. 
//
// When we need to refill 'buffer', we could pull bits straight from 
// the bitstream. But this is very slow and requires lots of book keeping
// (what's the next bit in the next byte?). Instead, we keep another Int64
// around that we use to refill from. While this doesn't cut down on the
// book keeping (still need to know how many valid bits), it does cut
// down on some of the bit shifting crazy and byte access. 
//
// The refill Int64 (bufferBack) gets left-shifted after we've pulled
// off bits. If we run out of bits in the input bit stream, we just
// shift in 0's to bufferBack. 
//
// The refill act takes numBits from the top of bufferBack and sticks
// them in the bottom of buffer. If there arn't enough bits in bufferBack,
// it gets refilled (to 64-bits) from the input bitstream.
//

inline void
FastHufDecoder::refill
    (Int64 &buffer,
     int numBits,                       // number of bits to refill
     Int64 &bufferBack,                 // the next 64-bits, to refill from
     int &bufferBackNumBits,            // number of bits left in bufferBack
     const unsigned char *&currByte,    // current byte in the bitstream
     int &currBitsLeft)                 // number of bits left in the bitsream
{
    // 
    // Refill bits into the bottom of buffer, from the top of bufferBack.
    // Always top up buffer to be completely full.
    //

    buffer |= bufferBack >> (64 - numBits);

    if (bufferBackNumBits < numBits)
    {
        numBits -= bufferBackNumBits;

        // 
        // Refill all of bufferBack from the bitstream. Either grab
        // a full 64-bit chunk, or whatever bytes are left. If we
        // don't have 64-bits left, pad with 0's.
        //

        if (currBitsLeft >= 64)
        {
            bufferBack        = READ64 (currByte); 
            bufferBackNumBits = 64;
            currByte         += sizeof (Int64);
            currBitsLeft     -= 8 * sizeof (Int64);

        }
        else
        {
            bufferBack        = 0;
            bufferBackNumBits = 64; 

            Int64 shift = 56;
            
            while (currBitsLeft > 0)
            {
                bufferBack |= ((Int64)(*currByte)) << shift;

                currByte++;
                shift        -= 8;
                currBitsLeft -= 8;
            }

            //
            // At this point, currBitsLeft might be negative, just because
            // we're subtracting whole bytes. To keep anyone from freaking
            // out, zero the counter.
            //

            if (currBitsLeft < 0)
                currBitsLeft = 0;
        }

        buffer |= bufferBack >> (64 - numBits);
    }
    
    bufferBack         = bufferBack << numBits;
    bufferBackNumBits -= numBits;

    // 
    // We can have cases where the previous shift of bufferBack is << 64 - 
    // in which case no shift occurs. The bit count math still works though,
    // so if we don't have any bits left, zero out bufferBack.
    //

    if (bufferBackNumBits == 0)
        bufferBack = 0;
}

//
// Read the next few bits out of a bitstream. Will be given a backing buffer
// (buffer) that may still have data left over from previous reads
// (bufferNumBits).  Bitstream pointer (currByte) will be advanced when needed.
//

inline Int64 
FastHufDecoder::readBits
    (int numBits,
     Int64 &buffer,             // c
     int &bufferNumBits,        // lc
     const char *&currByte)     // in
{
    while (bufferNumBits < numBits)
    {
        buffer = (buffer << 8) | *(unsigned char*)(currByte++);
        bufferNumBits += 8;
    }

    bufferNumBits -= numBits;
    return (buffer >> bufferNumBits) & ((1 << numBits) - 1);
}


//
// Decode using a the 'One-Shift' strategy for decoding, with a 
// small-ish table to accelerate decoding of short codes.
//
// If possible, try looking up codes into the acceleration table.
// This has a few benifits - there's no search involved; We don't
// need an additional lookup to map id to symbol; we don't need
// a full 64-bits (so less refilling). 
//

void
FastHufDecoder::decode
    (const unsigned char *src,
     int numSrcBits,
     unsigned short *dst, 
     int numDstElems)
{
    if (numSrcBits < 128)
        throw IEX_NAMESPACE::InputExc ("Error choosing Huffman decoder implementation "
                                       "(insufficient number of bits).");

    //
    // Current position (byte/bit) in the src data stream
    // (after the first buffer fill)
    //

    const unsigned char *currByte = src + 2 * sizeof (Int64);

    numSrcBits -= 8 * 2 * sizeof (Int64);

    //
    // 64-bit buffer holding the current bits in the stream
    //

    Int64 buffer            = READ64 (src); 
    int   bufferNumBits     = 64;

    //
    // 64-bit buffer holding the next bits in the stream
    //

    Int64 bufferBack        = READ64 ((src + sizeof (Int64))); 
    int   bufferBackNumBits = 64;

    int dstIdx = 0;

    while (dstIdx < numDstElems)
    {
        int  codeLen;
        int  symbol;

        //
        // Test if we can be table accelerated. If so, directly
        // lookup the output symbol. Otherwise, we need to fall
        // back to searching for the code.
        //
        // If we're doing table lookups, we don't really need
        // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS
        // left. But for a search, we do need a refilled table.
        //

        if (_tableMin <= buffer)
        {
            int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);

            // 
            // For invalid codes, _tableCodeLen[] should return 0. This
            // will cause the decoder to get stuck in the current spot
            // until we run out of elements, then barf that the codestream
            // is bad.  So we don't need to stick a condition like
            //     if (codeLen > _maxCodeLength) in this inner.
            //

            codeLen = _tableCodeLen[tableIdx];
            symbol  = _tableSymbol[tableIdx];
        }
        else
        {
            if (bufferNumBits < 64)
            {
                refill (buffer,
                        64 - bufferNumBits,
                        bufferBack,
                        bufferBackNumBits,
                        currByte,
                        numSrcBits);

                bufferNumBits = 64;
            }

            // 
            // Brute force search: 
            // Find the smallest length where _ljBase[length] <= buffer
            //

            codeLen = TABLE_LOOKUP_BITS + 1;

            while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)
                codeLen++;

            if (codeLen > _maxCodeLength)
            {
                throw IEX_NAMESPACE::InputExc ("Huffman decode error "
                                               "(Decoded an invalid symbol).");
            }

            Int64 id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));
            if (id < _numSymbols)
            {
                symbol = _idToSymbol[id];
            }
            else
            {
                throw IEX_NAMESPACE::InputExc ("Huffman decode error "
                                               "(Decoded an invalid symbol).");
            }
        }

        //
        // Shift over bit stream, and update the bit count in the buffer
        //

        buffer = buffer << codeLen;
        bufferNumBits -= codeLen;

        //
        // If we recieved a RLE symbol (_rleSymbol), then we need
        // to read ahead 8 bits to know how many times to repeat
        // the previous symbol. Need to ensure we at least have
        // 8 bits of data in the buffer
        //

        if (symbol == _rleSymbol)
        {
            if (bufferNumBits < 8)
            {
                refill (buffer,
                        64 - bufferNumBits,
                        bufferBack,
                        bufferBackNumBits,
                        currByte,
                        numSrcBits);

                bufferNumBits = 64;
            }

            int rleCount = buffer >> 56;

            if (dstIdx < 1)
            {
                throw IEX_NAMESPACE::InputExc ("Huffman decode error (RLE code "
                                               "with no previous symbol).");
            }

            if (dstIdx + rleCount > numDstElems)
            {
                throw IEX_NAMESPACE::InputExc ("Huffman decode error (Symbol run "
                                               "beyond expected output buffer length).");
            }

            if (rleCount <= 0) 
            {
                throw IEX_NAMESPACE::InputExc("Huffman decode error"
                                              " (Invalid RLE length)");
            }

            for (int i = 0; i < rleCount; ++i)
                dst[dstIdx + i] = dst[dstIdx - 1];

            dstIdx += rleCount;

            buffer = buffer << 8;
            bufferNumBits -= 8;
        }
        else
        {
            dst[dstIdx] = symbol;
            dstIdx++;
        }

        //
        // refill bit stream buffer if we're below the number of 
        // bits needed for a table lookup
        //

        if (bufferNumBits < TABLE_LOOKUP_BITS)
        {
            refill (buffer,
                    64 - bufferNumBits,
                    bufferBack,
                    bufferBackNumBits,
                    currByte,
                    numSrcBits);

            bufferNumBits = 64;
        }
    }

    if (numSrcBits != 0)
    {
        throw IEX_NAMESPACE::InputExc ("Huffman decode error (Compressed data remains "
                                       "after filling expected output buffer).");
    }
}

OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
Merge pull request #14725 from alalek:update_openexr_2.3.0 3rdparty: update OpenEXR 2.3.0 (#14725) * openexr 2.2.1 * openexr 2.3.0 * openexr: build fixes * openexr: build dwa tables on-demand 2019-06-11 01:04:23 +08:00			`///////////////////////////////////////////////////////////////////////////`
			`//`
			`// Copyright (c) 2009-2014 DreamWorks Animation LLC.`
			`//`
			`// All rights reserved.`
			`//`
			`// Redistribution and use in source and binary forms, with or without`
			`// modification, are permitted provided that the following conditions are`
			`// met:`
			`// * Redistributions of source code must retain the above copyright`
			`// notice, this list of conditions and the following disclaimer.`
			`// * Redistributions in binary form must reproduce the above`
			`// copyright notice, this list of conditions and the following disclaimer`
			`// in the documentation and/or other materials provided with the`
			`// distribution.`
			`// * Neither the name of DreamWorks Animation nor the names of`
			`// its contributors may be used to endorse or promote products derived`
			`// from this software without specific prior written permission.`
			`//`
			`// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`//`
			`///////////////////////////////////////////////////////////////////////////`

			`#include "ImfFastHuf.h"`
			`#include <Iex.h>`

			`#include <string.h>`
			`#include <assert.h>`
			`#include <math.h>`
			`#include <vector>`

			`OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER`

			`//`
			`// Adapted from hufUnpackEncTable -`
			`// We don't need to reconstruct the code book, just the encoded`
			`// lengths for each symbol. From the lengths, we can build the`
			`// base + offset tables. This should be a bit more efficient`
			`// for sparse code books.`
			`//`
			`// table - ptr to the start of the code length data. Will be`
			`// updated as we decode data`
			`//`
			`// numBytes - size of the encoded table (I think)?`
			`//`
			`// minSymbol - smallest symbol in the code book`
			`//`
			`// maxSymbol - largest symbol in the code book.`
			`//`
			`// rleSymbol - the symbol to trigger RLE in the encoded bitstream`
			`//`

			`FastHufDecoder::FastHufDecoder`
			`(const char *&table,`
			`int numBytes,`
			`int minSymbol,`
			`int maxSymbol,`
			`int rleSymbol)`
			`:`
			`_rleSymbol (rleSymbol),`
			`_numSymbols (0),`
			`_minCodeLength (255),`
			`_maxCodeLength (0),`
			`_idToSymbol (0)`
			`{`
			`//`
			`// List of symbols that we find with non-zero code lengths`
			`// (listed in the order we find them). Store these in the`
			`// same format as the code book stores codes + lengths -`
			`// low 6 bits are the length, everything above that is`
			`// the symbol.`
			`//`

			`std::vector<Int64> symbols;`

			`//`
			`// The 'base' table is the minimum code at each code length. base[i]`
			`// is the smallest code (numerically) of length i.`
			`//`

			`Int64 base[MAX_CODE_LEN + 1];`

			`//`
			`// The 'offset' table is the position (in sorted order) of the first id`
			`// of a given code lenght. Array is indexed by code length, like base.`
			`//`

			`Int64 offset[MAX_CODE_LEN + 1];`

			`//`
			`// Count of how many codes at each length there are. Array is`
			`// indexed by code length, like base and offset.`
			`//`

			`size_t codeCount[MAX_CODE_LEN + 1];`

			`for (int i = 0; i <= MAX_CODE_LEN; ++i)`
			`{`
			`codeCount[i] = 0;`
			`base[i] = 0xffffffffffffffffULL;`
			`offset[i] = 0;`
			`}`

			`//`
			`// Count the number of codes, the min/max code lengths, the number of`
			`// codes with each length, and record symbols with non-zero code`
			`// length as we find them.`
			`//`

			`const char *currByte = table;`
			`Int64 currBits = 0;`
			`int currBitCount = 0;`

			`const int SHORT_ZEROCODE_RUN = 59;`
			`const int LONG_ZEROCODE_RUN = 63;`
			`const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;`

			`for (Int64 symbol = minSymbol; symbol <= maxSymbol; symbol++)`
			`{`
			`if (currByte - table > numBytes)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "`
			`"(Truncated table data).");`
			`}`

			`//`
			`// Next code length - either:`
			`// 0-58 (literal code length)`
			`// 59-62 (various lengths runs of 0)`
			`// 63 (run of n 0's, with n is the next 8 bits)`
			`//`

			`Int64 codeLen = readBits (6, currBits, currBitCount, currByte);`

			`if (codeLen == (Int64) LONG_ZEROCODE_RUN)`
			`{`
			`if (currByte - table > numBytes)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "`
			`"(Truncated table data).");`
			`}`

			`int runLen = readBits (8, currBits, currBitCount, currByte) +`
			`SHORTEST_LONG_RUN;`

			`if (symbol + runLen > maxSymbol + 1)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "`
			`"(Run beyond end of table).");`
			`}`

			`symbol += runLen - 1;`

			`}`
			`else if (codeLen >= (Int64) SHORT_ZEROCODE_RUN)`
			`{`
			`int runLen = codeLen - SHORT_ZEROCODE_RUN + 2;`

			`if (symbol + runLen > maxSymbol + 1)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Error decoding Huffman table "`
			`"(Run beyond end of table).");`
			`}`

			`symbol += runLen - 1;`

			`}`
			`else if (codeLen != 0)`
			`{`
			`symbols.push_back ((symbol << 6) \| (codeLen & 63));`

			`if (codeLen < _minCodeLength)`
			`_minCodeLength = codeLen;`

			`if (codeLen > _maxCodeLength)`
			`_maxCodeLength = codeLen;`

			`codeCount[codeLen]++;`
			`}`
			`}`

			`for (int i = 0; i < MAX_CODE_LEN; ++i)`
			`_numSymbols += codeCount[i];`

			`table = currByte;`

			`//`
			`// Compute base - once we have the code length counts, there`
			`// is a closed form solution for this`
			`//`

			`{`
			`double* countTmp = new double[_maxCodeLength+1];`

			`for (int l = _minCodeLength; l <= _maxCodeLength; ++l)`
			`{`
			`countTmp[l] = (double)codeCount[l] *`
			`(double)(2 << (_maxCodeLength-l));`
			`}`

			`for (int l = _minCodeLength; l <= _maxCodeLength; ++l)`
			`{`
			`double tmp = 0;`

			`for (int k =l + 1; k <= _maxCodeLength; ++k)`
			`tmp += countTmp[k];`

			`tmp /= (double)(2 << (_maxCodeLength - l));`

			`base[l] = (Int64)ceil (tmp);`
			`}`

			`delete [] countTmp;`
			`}`

			`//`
			`// Compute offset - these are the positions of the first`
			`// id (not symbol) that has length [i]`
			`//`

			`offset[_maxCodeLength] = 0;`

			`for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--)`
			`offset[i] = offset[i + 1] + codeCount[i + 1];`

			`//`
			`// Allocate and fill the symbol-to-id mapping. Smaller Ids should be`
			`// mapped to less-frequent symbols (which have longer codes). Use`
			`// the offset table to tell us where the id's for a given code`
			`// length start off.`
			`//`

			`_idToSymbol = new int[_numSymbols];`

			`Int64 mapping[MAX_CODE_LEN + 1];`
			`for (int i = 0; i < MAX_CODE_LEN + 1; ++i)`
			`mapping[i] = -1;`
			`for (int i = _minCodeLength; i <= _maxCodeLength; ++i)`
			`mapping[i] = offset[i];`

			`for (std::vector<Int64>::const_iterator i = symbols.begin();`
			`i != symbols.end();`
			`++i)`
			`{`
			`int codeLen = *i & 63;`
			`int symbol = *i >> 6;`

			`if (mapping[codeLen] >= _numSymbols)`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error "`
			`"(Invalid symbol in header).");`

			`_idToSymbol[mapping[codeLen]] = symbol;`
			`mapping[codeLen]++;`
			`}`

			`buildTables(base, offset);`
			`}`


			`FastHufDecoder::~FastHufDecoder()`
			`{`
			`delete[] _idToSymbol;`
			`}`


			`//`
			`// Static check if the decoder is enabled.`
			`//`
			`// ATM, I only have access to little endian hardware for testing,`
			`// so I'm not entirely sure that we are reading fom the bit stream`
			`// properly on BE.`
			`//`
			`// If you happen to have more obscure hardware, check that the`
			`// byte swapping in refill() is happening sensable, add an endian`
			`// check if needed, and fix the preprocessor magic here.`
			`//`

			`#define READ64(c) \`
			`((Int64)(c)[0] << 56) \| ((Int64)(c)[1] << 48) \| ((Int64)(c)[2] << 40) \| \`
			`((Int64)(c)[3] << 32) \| ((Int64)(c)[4] << 24) \| ((Int64)(c)[5] << 16) \| \`
			`((Int64)(c)[6] << 8) \| ((Int64)(c)[7] )`

			`#ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts`
			`#if defined (__i386__) \|\| defined(__x86_64__)`
			`#undef READ64`
			`#define READ64(c) _bswap64 ((const Int64)(c))`
			`#endif`
			`#endif`


			`bool`
			`FastHufDecoder::enabled()`
			`{`
			`#if defined(__INTEL_COMPILER) \|\| defined(__GNUC__)`

			`//`
			`// Enabled for ICC, GCC:`
			`// __i386__ -> x86`
			`// __x86_64__ -> 64-bit x86`
			`//`

			`#if defined (__i386__) \|\| defined(__x86_64__)`
			`return true;`
			`#else`
			`return false;`
			`#endif`

			`#elif defined (_MSC_VER)`

			`//`
			`// Enabled for Visual Studio:`
			`// _M_IX86 -> x86`
			`// _M_X64 -> 64bit x86`

			`#if defined (_M_IX86) \|\| defined(_M_X64)`
			`return true;`
			`#else`
			`return false;`
			`#endif`

			`#else`

			`//`
			`// Unknown compiler - Be safe and disable.`
			`//`
			`return false;`
			`#endif`
			`}`

			`//`
			`//`
			`// Built the acceleration tables for lookups on the upper bits`
			`// as well as the 'LJ' tables.`
			`//`

			`void`
			`FastHufDecoder::buildTables (Int64 base, Int64 offset)`
			`{`
			`//`
			`// Build the 'left justified' base table, by shifting base left..`
			`//`

			`for (int i = 0; i <= MAX_CODE_LEN; ++i)`
			`{`
			`if (base[i] != 0xffffffffffffffffULL)`
			`{`
			`_ljBase[i] = base[i] << (64 - i);`
			`}`
			`else`
			`{`
			`//`
			`// Unused code length - insert dummy values`
			`//`

			`_ljBase[i] = 0xffffffffffffffffULL;`
			`}`
			`}`

			`//`
			`// Combine some terms into a big fat constant, which for`
			`// lack of a better term we'll call the 'left justified'`
			`// offset table (because it serves the same function`
			`// as 'offset', when using the left justified base table.`
			`//`

			`for (int i = 0; i <= MAX_CODE_LEN; ++i)`
			`_ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i));`

			`//`
			`// Build the acceleration tables for the lookups of`
			`// short codes ( <= TABLE_LOOKUP_BITS long)`
			`//`

			`for (Int64 i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i)`
			`{`
			`Int64 value = i << (64 - TABLE_LOOKUP_BITS);`

			`_tableSymbol[i] = 0xffff;`
			`_tableCodeLen[i] = 0;`

			`for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen)`
			`{`
			`if (_ljBase[codeLen] <= value)`
			`{`
			`_tableCodeLen[i] = codeLen;`

			`Int64 id = _ljOffset[codeLen] + (value >> (64 - codeLen));`
			`if (id < _numSymbols)`
			`{`
			`_tableSymbol[i] = _idToSymbol[id];`
			`}`
			`else`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error "`
			`"(Overrun).");`
			`}`
			`break;`
			`}`
			`}`
			`}`

			`//`
			`// Store the smallest value in the table that points to real data.`
			`// This should be the entry for the largest length that has`
			`// valid data (in our case, non-dummy _ljBase)`
			`//`

			`int minIdx = TABLE_LOOKUP_BITS;`

			`while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffULL)`
			`minIdx--;`

			`if (minIdx < 0)`
			`{`
			`//`
			`// Error, no codes with lengths 0-TABLE_LOOKUP_BITS used.`
			`// Set the min value such that the table is never tested.`
			`//`

			`_tableMin = 0xffffffffffffffffULL;`
			`}`
			`else`
			`{`
			`_tableMin = _ljBase[minIdx];`
			`}`
			`}`


			`//`
			`// For decoding, we're holding onto 2 Int64's.`
			`//`
			`// The first (buffer), holds the next bits from the bitstream to be`
			`// decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS`
			`// valid bits to decode the next symbol. For other paths, we need a full`
			`// 64-bits to decode a symbol.`
			`//`
			`// When we need to refill 'buffer', we could pull bits straight from`
			`// the bitstream. But this is very slow and requires lots of book keeping`
			`// (what's the next bit in the next byte?). Instead, we keep another Int64`
			`// around that we use to refill from. While this doesn't cut down on the`
			`// book keeping (still need to know how many valid bits), it does cut`
			`// down on some of the bit shifting crazy and byte access.`
			`//`
			`// The refill Int64 (bufferBack) gets left-shifted after we've pulled`
			`// off bits. If we run out of bits in the input bit stream, we just`
			`// shift in 0's to bufferBack.`
			`//`
			`// The refill act takes numBits from the top of bufferBack and sticks`
			`// them in the bottom of buffer. If there arn't enough bits in bufferBack,`
			`// it gets refilled (to 64-bits) from the input bitstream.`
			`//`

			`inline void`
			`FastHufDecoder::refill`
			`(Int64 &buffer,`
			`int numBits, // number of bits to refill`
			`Int64 &bufferBack, // the next 64-bits, to refill from`
			`int &bufferBackNumBits, // number of bits left in bufferBack`
			`const unsigned char *&currByte, // current byte in the bitstream`
			`int &currBitsLeft) // number of bits left in the bitsream`
			`{`
			`//`
			`// Refill bits into the bottom of buffer, from the top of bufferBack.`
			`// Always top up buffer to be completely full.`
			`//`

			`buffer \|= bufferBack >> (64 - numBits);`

			`if (bufferBackNumBits < numBits)`
			`{`
			`numBits -= bufferBackNumBits;`

			`//`
			`// Refill all of bufferBack from the bitstream. Either grab`
			`// a full 64-bit chunk, or whatever bytes are left. If we`
			`// don't have 64-bits left, pad with 0's.`
			`//`

			`if (currBitsLeft >= 64)`
			`{`
			`bufferBack = READ64 (currByte);`
			`bufferBackNumBits = 64;`
			`currByte += sizeof (Int64);`
			`currBitsLeft -= 8 * sizeof (Int64);`

			`}`
			`else`
			`{`
			`bufferBack = 0;`
			`bufferBackNumBits = 64;`

			`Int64 shift = 56;`

			`while (currBitsLeft > 0)`
			`{`
			`bufferBack \|= ((Int64)(*currByte)) << shift;`

			`currByte++;`
			`shift -= 8;`
			`currBitsLeft -= 8;`
			`}`

			`//`
			`// At this point, currBitsLeft might be negative, just because`
			`// we're subtracting whole bytes. To keep anyone from freaking`
			`// out, zero the counter.`
			`//`

			`if (currBitsLeft < 0)`
			`currBitsLeft = 0;`
			`}`

			`buffer \|= bufferBack >> (64 - numBits);`
			`}`

			`bufferBack = bufferBack << numBits;`
			`bufferBackNumBits -= numBits;`

			`//`
			`// We can have cases where the previous shift of bufferBack is << 64 -`
			`// in which case no shift occurs. The bit count math still works though,`
			`// so if we don't have any bits left, zero out bufferBack.`
			`//`

			`if (bufferBackNumBits == 0)`
			`bufferBack = 0;`
			`}`

			`//`
			`// Read the next few bits out of a bitstream. Will be given a backing buffer`
			`// (buffer) that may still have data left over from previous reads`
			`// (bufferNumBits). Bitstream pointer (currByte) will be advanced when needed.`
			`//`

			`inline Int64`
			`FastHufDecoder::readBits`
			`(int numBits,`
			`Int64 &buffer, // c`
			`int &bufferNumBits, // lc`
			`const char *&currByte) // in`
			`{`
			`while (bufferNumBits < numBits)`
			`{`
			`buffer = (buffer << 8) \| (unsigned char)(currByte++);`
			`bufferNumBits += 8;`
			`}`

			`bufferNumBits -= numBits;`
			`return (buffer >> bufferNumBits) & ((1 << numBits) - 1);`
			`}`


			`//`
			`// Decode using a the 'One-Shift' strategy for decoding, with a`
			`// small-ish table to accelerate decoding of short codes.`
			`//`
			`// If possible, try looking up codes into the acceleration table.`
			`// This has a few benifits - there's no search involved; We don't`
			`// need an additional lookup to map id to symbol; we don't need`
			`// a full 64-bits (so less refilling).`
			`//`

			`void`
			`FastHufDecoder::decode`
			`(const unsigned char *src,`
			`int numSrcBits,`
			`unsigned short *dst,`
			`int numDstElems)`
			`{`
			`if (numSrcBits < 128)`
			`throw IEX_NAMESPACE::InputExc ("Error choosing Huffman decoder implementation "`
			`"(insufficient number of bits).");`

			`//`
			`// Current position (byte/bit) in the src data stream`
			`// (after the first buffer fill)`
			`//`

			`const unsigned char currByte = src + 2 sizeof (Int64);`

			`numSrcBits -= 8 * 2 * sizeof (Int64);`

			`//`
			`// 64-bit buffer holding the current bits in the stream`
			`//`

			`Int64 buffer = READ64 (src);`
			`int bufferNumBits = 64;`

			`//`
			`// 64-bit buffer holding the next bits in the stream`
			`//`

			`Int64 bufferBack = READ64 ((src + sizeof (Int64)));`
			`int bufferBackNumBits = 64;`

			`int dstIdx = 0;`

			`while (dstIdx < numDstElems)`
			`{`
			`int codeLen;`
			`int symbol;`

			`//`
			`// Test if we can be table accelerated. If so, directly`
			`// lookup the output symbol. Otherwise, we need to fall`
			`// back to searching for the code.`
			`//`
			`// If we're doing table lookups, we don't really need`
			`// a re-filled buffer, so long as we have TABLE_LOOKUP_BITS`
			`// left. But for a search, we do need a refilled table.`
			`//`

			`if (_tableMin <= buffer)`
			`{`
			`int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS);`

			`//`
			`// For invalid codes, _tableCodeLen[] should return 0. This`
			`// will cause the decoder to get stuck in the current spot`
			`// until we run out of elements, then barf that the codestream`
			`// is bad. So we don't need to stick a condition like`
			`// if (codeLen > _maxCodeLength) in this inner.`
			`//`

			`codeLen = _tableCodeLen[tableIdx];`
			`symbol = _tableSymbol[tableIdx];`
			`}`
			`else`
			`{`
			`if (bufferNumBits < 64)`
			`{`
			`refill (buffer,`
			`64 - bufferNumBits,`
			`bufferBack,`
			`bufferBackNumBits,`
			`currByte,`
			`numSrcBits);`

			`bufferNumBits = 64;`
			`}`

			`//`
			`// Brute force search:`
			`// Find the smallest length where _ljBase[length] <= buffer`
			`//`

			`codeLen = TABLE_LOOKUP_BITS + 1;`

			`while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength)`
			`codeLen++;`

			`if (codeLen > _maxCodeLength)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error "`
			`"(Decoded an invalid symbol).");`
			`}`

			`Int64 id = _ljOffset[codeLen] + (buffer >> (64 - codeLen));`
			`if (id < _numSymbols)`
			`{`
			`symbol = _idToSymbol[id];`
			`}`
			`else`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error "`
			`"(Decoded an invalid symbol).");`
			`}`
			`}`

			`//`
			`// Shift over bit stream, and update the bit count in the buffer`
			`//`

			`buffer = buffer << codeLen;`
			`bufferNumBits -= codeLen;`

			`//`
			`// If we recieved a RLE symbol (_rleSymbol), then we need`
			`// to read ahead 8 bits to know how many times to repeat`
			`// the previous symbol. Need to ensure we at least have`
			`// 8 bits of data in the buffer`
			`//`

			`if (symbol == _rleSymbol)`
			`{`
			`if (bufferNumBits < 8)`
			`{`
			`refill (buffer,`
			`64 - bufferNumBits,`
			`bufferBack,`
			`bufferBackNumBits,`
			`currByte,`
			`numSrcBits);`

			`bufferNumBits = 64;`
			`}`

			`int rleCount = buffer >> 56;`

			`if (dstIdx < 1)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error (RLE code "`
			`"with no previous symbol).");`
			`}`

			`if (dstIdx + rleCount > numDstElems)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error (Symbol run "`
			`"beyond expected output buffer length).");`
			`}`

			`if (rleCount <= 0)`
			`{`
			`throw IEX_NAMESPACE::InputExc("Huffman decode error"`
			`" (Invalid RLE length)");`
			`}`

			`for (int i = 0; i < rleCount; ++i)`
			`dst[dstIdx + i] = dst[dstIdx - 1];`

			`dstIdx += rleCount;`

			`buffer = buffer << 8;`
			`bufferNumBits -= 8;`
			`}`
			`else`
			`{`
			`dst[dstIdx] = symbol;`
			`dstIdx++;`
			`}`

			`//`
			`// refill bit stream buffer if we're below the number of`
			`// bits needed for a table lookup`
			`//`

			`if (bufferNumBits < TABLE_LOOKUP_BITS)`
			`{`
			`refill (buffer,`
			`64 - bufferNumBits,`
			`bufferBack,`
			`bufferBackNumBits,`
			`currByte,`
			`numSrcBits);`

			`bufferNumBits = 64;`
			`}`
			`}`

			`if (numSrcBits != 0)`
			`{`
			`throw IEX_NAMESPACE::InputExc ("Huffman decode error (Compressed data remains "`
			`"after filling expected output buffer).");`
			`}`
			`}`

			`OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT`