diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 14a5671cebd5d82bea22b83fc756ff5de6153f75..6e853ecfc327faea9aaa70b5afd30c217019b08e 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -291,6 +291,12 @@ static const int AFFINE_MAX_NUM_V2 = 2; ///< max static const int AFFINE_MAX_NUM_COMB = 12; ///< max number of combined motion candidates static const int AFFINE_MIN_BLOCK_SIZE = 4; ///< Minimum affine MC block size +#if JVET_L0274 +static const int MAX_NUM_REG_BINS_4x4SUBBLOCK = 32; ///< max number of context-coded bins (incl. gt2 bins) per 4x4 subblock +static const int MAX_NUM_GT2_BINS_4x4SUBBLOCK = 4; ///< max number of gt2 bins per 4x4 subblock +static const int MAX_NUM_REG_BINS_2x2SUBBLOCK = 8; ///< max number of context-coded bins (incl. gt2 bins) per 2x2 subblock (chroma) +static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK = 2; ///< max number of gt2 bins per 2x2 subblock (chroma) +#endif #if JVET_L0646_GBI static const int GBI_NUM = 5; ///< the number of weight options static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5 diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 1a353fd6579319f32648e630c59674f13da1527e..58c8d8e5898b9222b1fa7529b6654b597c06ffed 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -107,7 +107,11 @@ public: const int diag = posX + posY; int numPos = 0; int sumAbs = 0; +#if JVET_L0274 +#define UPDATE(x) {int a=abs(x);sumAbs+=std::min(2+(a&1),a);numPos+=!!a;} +#else #define UPDATE(x) {int a=abs(x);sumAbs+=std::min(4-(a&1),a);numPos+=!!a;} +#endif if( posX < m_width-1 ) { UPDATE( pData[1] ); @@ -154,6 +158,36 @@ public: unsigned greater1CtxIdAbs ( uint8_t offset ) const { return m_gtxFlagCtxSet[1]( offset ); } unsigned greater2CtxIdAbs ( uint8_t offset ) const { return m_gtxFlagCtxSet[0]( offset ); } +#if JVET_L0274 + unsigned templateAbsSum( int scanPos, const TCoeff* coeff ) + { + const uint32_t posY = m_scanPosY[scanPos]; + const uint32_t posX = m_scanPosX[scanPos]; + const TCoeff* pData = coeff + posX + posY * m_width; + int sum = 0; + if (posX < m_width - 1) + { + sum += abs(pData[1]); + if (posX < m_width - 2) + { + sum += abs(pData[2]); + } + if (posY < m_height - 1) + { + sum += abs(pData[m_width + 1]); + } + } + if (posY < m_height - 1) + { + sum += abs(pData[m_width]); + if (posY < m_height - 2) + { + sum += abs(pData[m_width << 1]); + } + } + return std::min(sum, 31); + } +#else unsigned GoRiceParAbs( int scanPos, const TCoeff* coeff ) const { #define UPDATE(x) sum+=abs(x)-!!x @@ -185,7 +219,7 @@ public: int r = g_auiGoRicePars[ std::min( sum, 31 ) ]; return r; } - +#endif unsigned emtNumSigCoeff() const { return m_emtNumSigCoeff; } void setEmtNumSigCoeff( unsigned val ) { m_emtNumSigCoeff = val; } diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 6025bfcd1caf4a06c67c542808db4563ec776a22..63576e159053b7ae8bf35f55b6658c5a6141409f 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -456,6 +456,44 @@ const CtxSet ContextSetCfg::SigCoeffGroup[] = const CtxSet ContextSetCfg::SigFlag[] = { +#if JVET_L0274 + ContextSetCfg::addCtxSet + ({ + { 120, 152, 167, 153, 168, 169, 119, 167, 197, 183, 183, 170, 209, 213, 183, 183, 169, 185, }, + { 149, 152, 167, 168, 183, 140, 149, 182, 168, 183, 169, 170, 195, 213, 183, 198, 184, 156, }, + { 120, 138, 153, 154, 140, 126, 120, 139, 154, 155, 155, 142, 137, 185, 169, 185, 171, 159, }, + }), + ContextSetCfg::addCtxSet + ({ + { 148, 167, 153, 139, 154, 140, 166, 199, 183, 184, 184, 157, }, + { 134, 168, 168, 139, 169, 155, 166, 229, 198, 229, 185, 157, }, + { 119, 168, 153, 140, 140, 141, 167, 200, 155, 172, 142, 158, }, + }), + ContextSetCfg::addCtxSet + ({ + { 152, 127, 173, 201, 187, 173, 226, 188, 188, 217, 188, 174, 182, 223, 223, 223, 223, 223, }, + { 123, 142, 202, 172, 157, 203, 138, 173, 218, 188, 173, 175, 168, 223, 223, 223, 223, 223, }, + { 108, 157, 173, 173, 218, 189, 123, 175, 159, 175, 190, 251, 79, 223, 223, 223, 223, 223, }, + }), + ContextSetCfg::addCtxSet + ({ + { 196, 156, 143, 158, 172, 216, 168, 223, 223, 223, 191, 223, }, + { 182, 141, 158, 186, 142, 173, 183, 223, 223, 223, 222, 223, }, + { 152, 158, 157, 187, 204, 175, 170, 223, 223, 237, 223, 223, }, + }), + ContextSetCfg::addCtxSet + ({ + { 137, 142, 189, 173, 187, 174, 241, 175, 175, 174, 174, 204, 210, 223, 223, 223, 223, 223, }, + { 123, 172, 175, 158, 158, 233, 138, 175, 190, 175, 188, 175, 196, 223, 223, 223, 223, 223, }, + { 107, 143, 219, 188, 233, 190, 63, 250, 205, 252, 220, 251, 63, 223, 223, 223, 223, 253, }, + }), + ContextSetCfg::addCtxSet + ({ + { 167, 185, 159, 158, 159, 189, 196, 223, 223, 223, 223, 223, }, + { 167, 141, 175, 143, 172, 159, 182, 223, 223, 223, 223, 223, }, + { 166, 159, 158, 232, 158, 174, 183, 238, 223, 223, 223, 223, }, + }), +#else ContextSetCfg::addCtxSet ({ { 106, 167, 182, 124, 139, 169, 134, 167, 197, 183, 183, 184, 209, 198, 168, 168, 183, 170, CNU, CNU, }, @@ -492,9 +530,55 @@ const CtxSet ContextSetCfg::SigFlag[] = { 167, 155, 159, 157, 157, 158, 182, 223, 223, 223, 223, 223, }, { 181, 159, 143, 232, 143, 173, 169, 237, 223, 223, 238, 253, }, }), +#endif }; +#if JVET_L0274 +const CtxSet ContextSetCfg::ParFlag[] = +{ + ContextSetCfg::addCtxSet + ({ + { 105, 119, 151, 152, 153, 153, 135, 152, 182, 153, 168, 136, 182, 153, 168, 139, 166, 168, 139, 168, 154, }, + { 120, 119, 151, 167, 138, 168, 135, 152, 153, 153, 139, 136, 153, 153, 168, 139, 137, 168, 168, 139, 139, }, + { 135, 150, 152, 138, 153, 153, 151, 123, 153, 168, 139, 152, 153, 153, 139, 139, 138, 168, 139, 154, 139, }, + }), + ContextSetCfg::addCtxSet + ({ + { 105, 135, 152, 167, 153, 124, 151, 168, 169, 153, 124, }, + { 134, 150, 152, 153, 153, 153, 166, 168, 168, 139, 139, }, + { 135, 121, 167, 168, 138, 153, 167, 139, 154, 139, 154, }, + }), +}; + +const CtxSet ContextSetCfg::GtxFlag[] = +{ + ContextSetCfg::addCtxSet + ({ + { 73, 0, 58, 119, 150, 137, 42, 73, 120, 136, 123, 58, 149, 151, 152, 153, 134, 136, 152, 153, 125, }, + { 88, 0, 102, 104, 150, 122, 101, 89, 150, 151, 138, 88, 120, 122, 152, 153, 105, 107, 123, 153, 154, }, + { 134, 161, 149, 121, 122, 138, 88, 120, 107, 108, 109, 105, 107, 123, 109, 124, 151, 138, 139, 154, 140, }, + }), + ContextSetCfg::addCtxSet + ({ + { 87, 57, 90, 107, 107, 63, 119, 91, 152, 124, 140, }, + { 101, 0, 105, 121, 107, 93, 118, 106, 108, 124, 154, }, + { 179, 72, 90, 121, 122, 123, 75, 76, 123, 139, 170, }, + }), + ContextSetCfg::addCtxSet + ({ + { 89, 103, 121, 137, 138, 139, 119, 137, 138, 139, 125, 135, 167, 168, 154, 140, 136, 153, 183, 155, 185, }, + { 118, 0, 136, 152, 153, 154, 134, 152, 153, 139, 140, 150, 138, 139, 154, 155, 151, 153, 169, 140, 200, }, + { 164, 149, 137, 153, 124, 125, 151, 138, 139, 125, 125, 152, 139, 140, 140, 111, 153, 154, 155, 170, 127, }, + }), + ContextSetCfg::addCtxSet + ({ + { 27, 149, 137, 153, 139, 125, 151, 154, 170, 127, 127, }, + { 132, 135, 152, 139, 139, 125, 151, 154, 155, 141, 142, }, + { 165, 121, 138, 139, 139, 125, 138, 154, 156, 171, 127, }, + }), +}; +#else const CtxSet ContextSetCfg::ParFlag[] = { ContextSetCfg::addCtxSet @@ -538,6 +622,7 @@ const CtxSet ContextSetCfg::GtxFlag[] = { 147, 73, 164, 151, 107, 109, 120, 152, 140, 185, 111, }, }), }; +#endif const CtxSet ContextSetCfg::LastX[] = { diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp index 8e146f018920078b34b35716d9850cfa5b417a6d..2e725de3936f71a437be0530a6836a778e2bb46e 100644 --- a/source/Lib/CommonLib/DepQuant.cpp +++ b/source/Lib/CommonLib/DepQuant.cpp @@ -64,9 +64,502 @@ namespace DQIntern }; struct CoeffFracBits { +#if JVET_L0274 + int32_t bits[6]; +#else int32_t bits[7]; +#endif + }; + + +#if JVET_L0274_ENCODER_SPEED_UP + enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 }; + + struct ScanInfo + { + ScanInfo() {} + int sbbSize; + int numSbb; + int scanIdx; + int rasterPos; + int sbbPos; + int insidePos; + bool eosbb; + ScanPosType spt; + unsigned sigCtxOffsetNext; + unsigned gtxCtxOffsetNext; + int nextInsidePos; + NbInfoSbb nextNbInfoSbb; + int nextSbbRight; + int nextSbbBelow; }; + class Rom; + struct TUParameters + { + TUParameters ( const Rom& rom, const unsigned width, const unsigned height, const ChannelType chType ); + ~TUParameters() + { + delete [] m_scanInfo; + } + + ChannelType m_chType; + unsigned m_width; + unsigned m_height; + unsigned m_numCoeff; + unsigned m_numSbb; + unsigned m_log2SbbWidth; + unsigned m_log2SbbHeight; + unsigned m_log2SbbSize; + unsigned m_sbbSize; + unsigned m_sbbMask; + unsigned m_widthInSbb; + unsigned m_heightInSbb; + CoeffScanType m_scanType; + const unsigned* m_scanSbbId2SbbPos; + const unsigned* m_scanId2BlkPos; + const unsigned* m_scanId2PosX; + const unsigned* m_scanId2PosY; + const NbInfoSbb* m_scanId2NbInfoSbb; + const NbInfoOut* m_scanId2NbInfoOut; + ScanInfo* m_scanInfo; + private: + void xSetScanInfo( ScanInfo& scanInfo, int scanIdx ); + }; + + class Rom + { + public: + Rom() : m_scansInitialized(false) {} + ~Rom() { xUninitScanArrays(); } + void init () { xInitScanArrays(); } + const NbInfoSbb* getNbInfoSbb( int hd, int vd ) const { return m_scanId2NbInfoSbbArray[hd][vd]; } + const NbInfoOut* getNbInfoOut( int hd, int vd ) const { return m_scanId2NbInfoOutArray[hd][vd]; } + const TUParameters* getTUPars ( const CompArea& area, const ComponentID compID ) const + { + return m_tuParameters[g_aucLog2[area.width]][g_aucLog2[area.height]][toChannelType(compID)]; + } + private: + void xInitScanArrays (); + void xUninitScanArrays (); + private: + bool m_scansInitialized; + NbInfoSbb* m_scanId2NbInfoSbbArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ]; + NbInfoOut* m_scanId2NbInfoOutArray[ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ]; + TUParameters* m_tuParameters [ MAX_CU_DEPTH+1 ][ MAX_CU_DEPTH+1 ][ MAX_NUM_CHANNEL_TYPE ]; + }; + + void Rom::xInitScanArrays() + { + if( m_scansInitialized ) + { + return; + } + ::memset( m_scanId2NbInfoSbbArray, 0, sizeof(m_scanId2NbInfoSbbArray) ); + ::memset( m_scanId2NbInfoOutArray, 0, sizeof(m_scanId2NbInfoOutArray) ); + ::memset( m_tuParameters, 0, sizeof(m_tuParameters) ); + + uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ]; + + for( int hd = 1; hd <= MAX_CU_DEPTH; hd++ ) + { + for( int vd = 1; vd <= MAX_CU_DEPTH; vd++ ) + { + const uint32_t blockWidth = (1 << hd); + const uint32_t blockHeight = (1 << vd); + const uint32_t totalValues = blockWidth * blockHeight; + const uint32_t log2CGWidth = (blockWidth & 3) + (blockHeight & 3) > 0 ? 1 : 2; + const uint32_t log2CGHeight = (blockWidth & 3) + (blockHeight & 3) > 0 ? 1 : 2; + const uint32_t groupWidth = 1 << log2CGWidth; + const uint32_t groupHeight = 1 << log2CGHeight; + const uint32_t groupSize = groupWidth * groupHeight; + const CoeffScanType scanType = SCAN_DIAG; + const SizeType blkWidthIdx = gp_sizeIdxInfo->idxFrom( blockWidth ); + const SizeType blkHeightIdx = gp_sizeIdxInfo->idxFrom( blockHeight ); + const uint32_t* scanId2RP = g_scanOrder [SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx]; + const uint32_t* scanId2X = g_scanOrderPosXY[SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx][0]; + const uint32_t* scanId2Y = g_scanOrderPosXY[SCAN_GROUPED_4x4][scanType][blkWidthIdx][blkHeightIdx][1]; + NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd]; + NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd]; + + sId2NbSbb = new NbInfoSbb[ totalValues ]; + sId2NbOut = new NbInfoOut[ totalValues ]; + + for( uint32_t scanId = 0; scanId < totalValues; scanId++ ) + { + raster2id[ scanId2RP[ scanId ] ] = scanId; + } + + for( unsigned scanId = 0; scanId < totalValues; scanId++ ) + { + const int posX = scanId2X [ scanId ]; + const int posY = scanId2Y [ scanId ]; + const int rpos = scanId2RP[ scanId ]; + { + //===== inside subband neighbours ===== + NbInfoSbb& nbSbb = sId2NbSbb[ scanId ]; + const int begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock + int cpos[5]; + cpos[0] = ( posX < blockWidth -1 ? ( raster2id[rpos+1 ] - begSbb < groupSize ? raster2id[rpos+1 ] - begSbb : 0 ) : 0 ); + cpos[1] = ( posX < blockWidth -2 ? ( raster2id[rpos+2 ] - begSbb < groupSize ? raster2id[rpos+2 ] - begSbb : 0 ) : 0 ); + cpos[2] = ( posX < blockWidth -1 && posY < blockHeight-1 ? ( raster2id[rpos+1+blockWidth] - begSbb < groupSize ? raster2id[rpos+1+blockWidth] - begSbb : 0 ) : 0 ); + cpos[3] = ( posY < blockHeight-1 ? ( raster2id[rpos+ blockWidth] - begSbb < groupSize ? raster2id[rpos+ blockWidth] - begSbb : 0 ) : 0 ); + cpos[4] = ( posY < blockHeight-2 ? ( raster2id[rpos+2*blockWidth] - begSbb < groupSize ? raster2id[rpos+2*blockWidth] - begSbb : 0 ) : 0 ); + for( nbSbb.num = 0; true; ) + { + int nk = -1; + for( int k = 0; k < 5; k++ ) + { + if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) ) + { + nk = k; + } + } + if( nk < 0 ) + { + break; + } + nbSbb.inPos[ nbSbb.num++ ] = uint8_t( cpos[nk] ); + cpos[nk] = 0; + } + for( int k = nbSbb.num; k < 5; k++ ) + { + nbSbb.inPos[k] = 0; + } + } + { + //===== outside subband neighbours ===== + NbInfoOut& nbOut = sId2NbOut[ scanId ]; + const int begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock + int cpos[5]; + cpos[0] = ( posX < blockWidth -1 ? ( raster2id[rpos+1 ] - begSbb >= groupSize ? raster2id[rpos+1 ] : 0 ) : 0 ); + cpos[1] = ( posX < blockWidth -2 ? ( raster2id[rpos+2 ] - begSbb >= groupSize ? raster2id[rpos+2 ] : 0 ) : 0 ); + cpos[2] = ( posX < blockWidth -1 && posY < blockHeight-1 ? ( raster2id[rpos+1+blockWidth] - begSbb >= groupSize ? raster2id[rpos+1+blockWidth] : 0 ) : 0 ); + cpos[3] = ( posY < blockHeight-1 ? ( raster2id[rpos+ blockWidth] - begSbb >= groupSize ? raster2id[rpos+ blockWidth] : 0 ) : 0 ); + cpos[4] = ( posY < blockHeight-2 ? ( raster2id[rpos+2*blockWidth] - begSbb >= groupSize ? raster2id[rpos+2*blockWidth] : 0 ) : 0 ); + for( nbOut.num = 0; true; ) + { + int nk = -1; + for( int k = 0; k < 5; k++ ) + { + if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) ) + { + nk = k; + } + } + if( nk < 0 ) + { + break; + } + nbOut.outPos[ nbOut.num++ ] = uint16_t( cpos[nk] ); + cpos[nk] = 0; + } + for( int k = nbOut.num; k < 5; k++ ) + { + nbOut.outPos[k] = 0; + } + nbOut.maxDist = ( scanId == 0 ? 0 : sId2NbOut[scanId-1].maxDist ); + for( int k = 0; k < nbOut.num; k++ ) + { + if( nbOut.outPos[k] > nbOut.maxDist ) + { + nbOut.maxDist = nbOut.outPos[k]; + } + } + } + } + + // make it relative + for( unsigned scanId = 0; scanId < totalValues; scanId++ ) + { + NbInfoOut& nbOut = sId2NbOut[scanId]; + const int begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock + for( int k = 0; k < nbOut.num; k++ ) + { + nbOut.outPos[k] -= begSbb; + } + nbOut.maxDist -= scanId; + } + + for( int chId = 0; chId < MAX_NUM_CHANNEL_TYPE; chId++ ) + { + m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) ); + } + } + } + m_scansInitialized = true; + } + + void Rom::xUninitScanArrays() + { + if( !m_scansInitialized ) + { + return; + } + for( int hd = 0; hd <= MAX_CU_DEPTH; hd++ ) + { + for( int vd = 0; vd <= MAX_CU_DEPTH; vd++ ) + { + NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd]; + NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd]; + if( sId2NbSbb ) + { + delete [] sId2NbSbb; + } + if( sId2NbOut ) + { + delete [] sId2NbOut; + } + for( int chId = 0; chId < MAX_NUM_CHANNEL_TYPE; chId++ ) + { + TUParameters*& tuPars = m_tuParameters[hd][vd][chId]; + if( tuPars ) + { + delete tuPars; + } + } + } + } + m_scansInitialized = false; + } + + + static Rom g_Rom; + + + TUParameters::TUParameters( const Rom& rom, const unsigned width, const unsigned height, const ChannelType chType ) + { + m_chType = chType; + m_width = width; + m_height = height; + m_numCoeff = m_width * m_height; + const bool no4x4 = ( ( m_width & 3 ) != 0 || ( m_height & 3 ) != 0 ); + m_log2SbbWidth = ( no4x4 ? 1 : 2 ); + m_log2SbbHeight = ( no4x4 ? 1 : 2 ); + m_log2SbbSize = m_log2SbbWidth + m_log2SbbHeight; + m_sbbSize = ( 1 << m_log2SbbSize ); + m_sbbMask = m_sbbSize - 1; + m_widthInSbb = m_width >> m_log2SbbWidth; + m_heightInSbb = m_height >> m_log2SbbHeight; + m_numSbb = m_widthInSbb * m_heightInSbb; +#if HEVC_USE_MDCS +#error "MDCS is not supported" // use different function... + // m_scanType = CoeffScanType( TU::getCoefScanIdx( tu, m_compID ) ); +#else + m_scanType = SCAN_DIAG; +#endif + SizeType hsbb = gp_sizeIdxInfo->idxFrom( m_widthInSbb ); + SizeType vsbb = gp_sizeIdxInfo->idxFrom( m_heightInSbb ); + SizeType hsId = gp_sizeIdxInfo->idxFrom( m_width ); + SizeType vsId = gp_sizeIdxInfo->idxFrom( m_height ); + m_scanSbbId2SbbPos = g_scanOrder [ SCAN_UNGROUPED ][ m_scanType ][ hsbb ][ vsbb ]; + m_scanId2BlkPos = g_scanOrder [ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ]; + m_scanId2PosX = g_scanOrderPosXY[ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ][ 0 ]; + m_scanId2PosY = g_scanOrderPosXY[ SCAN_GROUPED_4x4 ][ m_scanType ][ hsId ][ vsId ][ 1 ]; + int log2W = g_aucLog2[ m_width ]; + int log2H = g_aucLog2[ m_height ]; + m_scanId2NbInfoSbb = rom.getNbInfoSbb( log2W, log2H ); + m_scanId2NbInfoOut = rom.getNbInfoOut( log2W, log2H ); + m_scanInfo = new ScanInfo[ m_numCoeff ]; + for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ ) + { + xSetScanInfo( m_scanInfo[scanIdx], scanIdx ); + } + } + + + void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx ) + { + scanInfo.sbbSize = m_sbbSize; + scanInfo.numSbb = m_numSbb; + scanInfo.scanIdx = scanIdx; + scanInfo.rasterPos = m_scanId2BlkPos[ scanIdx ]; + scanInfo.sbbPos = m_scanSbbId2SbbPos[ scanIdx >> m_log2SbbSize ]; + scanInfo.insidePos = scanIdx & m_sbbMask; + scanInfo.eosbb = ( scanInfo.insidePos == 0 ); + scanInfo.spt = SCAN_ISCSBB; + if( scanInfo.insidePos == m_sbbMask && scanIdx > scanInfo.sbbSize && scanIdx < m_numCoeff - 1 ) + scanInfo.spt = SCAN_SOCSBB; + else if( scanInfo.eosbb && scanIdx > 0 && scanIdx < m_numCoeff - m_sbbSize ) + scanInfo.spt = SCAN_EOCSBB; + if( scanIdx ) + { + const int nextScanIdx = scanIdx - 1; + const int diag = m_scanId2PosX[ nextScanIdx ] + m_scanId2PosY[ nextScanIdx ]; + if( m_chType == CHANNEL_TYPE_LUMA ) + { + scanInfo.sigCtxOffsetNext = ( diag < 2 ? 12 : diag < 5 ? 6 : 0 ); + scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 ); + } + else + { + scanInfo.sigCtxOffsetNext = ( diag < 2 ? 6 : 0 ); + scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 ); + } + scanInfo.nextInsidePos = nextScanIdx & m_sbbMask; + scanInfo.nextNbInfoSbb = m_scanId2NbInfoSbb[ nextScanIdx ]; + if( scanInfo.eosbb ) + { + const int nextSbbPos = m_scanSbbId2SbbPos[ nextScanIdx >> m_log2SbbSize ]; + const int nextSbbPosY = nextSbbPos / m_widthInSbb; + const int nextSbbPosX = nextSbbPos - nextSbbPosY * m_widthInSbb; + scanInfo.nextSbbRight = ( nextSbbPosX < m_widthInSbb - 1 ? nextSbbPos + 1 : 0 ); + scanInfo.nextSbbBelow = ( nextSbbPosY < m_heightInSbb - 1 ? nextSbbPos + m_widthInSbb : 0 ); + } + } + } + + + + class RateEstimator + { + public: + RateEstimator () {} + ~RateEstimator() {} + void initCtx ( const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID, const FracBitsAccess& fracBitsAccess ); + + inline const BinFracBits *sigSbbFracBits() const { return m_sigSbbFracBits; } + inline const BinFracBits *sigFlagBits(unsigned stateId) const + { + return m_sigFracBits[std::max(((int) stateId) - 1, 0)]; + } + inline const CoeffFracBits *gtxFracBits(unsigned stateId) const { return m_gtxFracBits; } + inline int32_t lastOffset(unsigned scanIdx) const + { + return m_lastBitsX[m_scanId2PosX[scanIdx]] + m_lastBitsY[m_scanId2PosY[scanIdx]]; + } + + private: + void xSetLastCoeffOffset ( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID ); + void xSetSigSbbFracBits ( const FracBitsAccess& fracBitsAccess, ChannelType chType ); + void xSetSigFlagBits ( const FracBitsAccess& fracBitsAccess, ChannelType chType ); + void xSetGtxFlagBits ( const FracBitsAccess& fracBitsAccess, ChannelType chType ); + + private: + static const unsigned sm_numCtxSetsSig = 3; + static const unsigned sm_numCtxSetsGtx = 2; + static const unsigned sm_maxNumSigSbbCtx = 2; + static const unsigned sm_maxNumSigCtx = 18; + static const unsigned sm_maxNumGtxCtx = 21; + + private: + const unsigned* m_scanId2PosX; + const unsigned* m_scanId2PosY; + int32_t m_lastBitsX [ MAX_TU_SIZE ]; + int32_t m_lastBitsY [ MAX_TU_SIZE ]; + BinFracBits m_sigSbbFracBits [ sm_maxNumSigSbbCtx ]; + BinFracBits m_sigFracBits [ sm_numCtxSetsSig ][ sm_maxNumSigCtx ]; + CoeffFracBits m_gtxFracBits [ sm_maxNumGtxCtx ]; + }; + + void RateEstimator::initCtx( const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID, const FracBitsAccess& fracBitsAccess ) + { + m_scanId2PosX = tuPars.m_scanId2PosX; + m_scanId2PosY = tuPars.m_scanId2PosY; + xSetSigSbbFracBits ( fracBitsAccess, tuPars.m_chType ); + xSetSigFlagBits ( fracBitsAccess, tuPars.m_chType ); + xSetGtxFlagBits ( fracBitsAccess, tuPars.m_chType ); + xSetLastCoeffOffset ( fracBitsAccess, tuPars, tu, compID ); + } + + void RateEstimator::xSetLastCoeffOffset( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID ) + { + const ChannelType chType = ( compID == COMPONENT_Y ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA ); + int32_t cbfDeltaBits = 0; + if( compID == COMPONENT_Y && !CU::isIntra(*tu.cu) && !tu.depth ) + { + const BinFracBits bits = fracBitsAccess.getFracBitsArray( Ctx::QtRootCbf() ); + cbfDeltaBits = int32_t( bits.intBits[1] ) - int32_t( bits.intBits[0] ); + } + else + { +#if ENABLE_BMS + BinFracBits bits = fracBitsAccess.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( compID, tu.depth, tu.cbf[COMPONENT_Cb] ) ) ); +#else + BinFracBits bits = fracBitsAccess.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( compID, tu.cbf[COMPONENT_Cb] ) ) ); +#endif + cbfDeltaBits = int32_t( bits.intBits[1] ) - int32_t( bits.intBits[0] ); + } + + static const unsigned prefixCtx[] = { 0, 0, 0, 3, 6, 10, 15, 21 }; + uint32_t ctxBits [ LAST_SIGNIFICANT_GROUPS ]; + for( unsigned xy = 0; xy < 2; xy++ ) + { + int32_t bitOffset = ( xy ? cbfDeltaBits : 0 ); + int32_t* lastBits = ( xy ? m_lastBitsY : m_lastBitsX ); + const unsigned size = ( xy ? tuPars.m_height : tuPars.m_width ); + const unsigned log2Size = g_aucNextLog2[ size ]; +#if HEVC_USE_MDCS + const bool useYCtx = ( m_scanType == SCAN_VER ? ( xy == 0 ) : ( xy != 0 ) ); +#else + const bool useYCtx = ( xy != 0 ); +#endif + const CtxSet& ctxSetLast = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ]; + const unsigned lastShift = ( compID == COMPONENT_Y ? (log2Size+1)>>2 : ( tu.cs->pcv->rectCUs ? Clip3<unsigned>(0,2,size>>3) : log2Size-2 ) ); + const unsigned lastOffset = ( compID == COMPONENT_Y ? ( tu.cs->pcv->rectCUs ? prefixCtx[log2Size] : 3*(log2Size-2)+((log2Size-1)>>2) ) : 0 ); + uint32_t sumFBits = 0; + unsigned maxCtxId = g_uiGroupIdx[ size - 1 ]; + for( unsigned ctxId = 0; ctxId < maxCtxId; ctxId++ ) + { + const BinFracBits bits = fracBitsAccess.getFracBitsArray( ctxSetLast( lastOffset + ( ctxId >> lastShift ) ) ); + ctxBits[ ctxId ] = sumFBits + bits.intBits[0] + ( ctxId>3 ? ((ctxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset; + sumFBits += bits.intBits[1]; + } + ctxBits [ maxCtxId ] = sumFBits + ( maxCtxId>3 ? ((maxCtxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset; + for( unsigned pos = 0; pos < size; pos++ ) + { + lastBits[ pos ] = ctxBits[ g_uiGroupIdx[ pos ] ]; + } + } + } + + void RateEstimator::xSetSigSbbFracBits( const FracBitsAccess& fracBitsAccess, ChannelType chType ) + { + const CtxSet& ctxSet = Ctx::SigCoeffGroup[ chType ]; + for( unsigned ctxId = 0; ctxId < sm_maxNumSigSbbCtx; ctxId++ ) + { + m_sigSbbFracBits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) ); + } + } + + void RateEstimator::xSetSigFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType ) + { + for( unsigned ctxSetId = 0; ctxSetId < sm_numCtxSetsSig; ctxSetId++ ) + { + BinFracBits* bits = m_sigFracBits [ ctxSetId ]; + const CtxSet& ctxSet = Ctx::SigFlag [ chType + 2*ctxSetId ]; + const unsigned numCtx = ( chType == CHANNEL_TYPE_LUMA ? 18 : 12 ); + for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ ) + { + bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) ); + } + } + } + + void RateEstimator::xSetGtxFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType ) + { + const CtxSet& ctxSetPar = Ctx::ParFlag [ chType ]; + const CtxSet& ctxSetGt1 = Ctx::GtxFlag [ 2 + chType ]; + const CtxSet& ctxSetGt2 = Ctx::GtxFlag [ chType ]; + const unsigned numCtx = ( chType == CHANNEL_TYPE_LUMA ? 21 : 11 ); + for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ ) + { + BinFracBits fbPar = fracBitsAccess.getFracBitsArray( ctxSetPar( ctxId ) ); + BinFracBits fbGt1 = fracBitsAccess.getFracBitsArray( ctxSetGt1( ctxId ) ); + BinFracBits fbGt2 = fracBitsAccess.getFracBitsArray( ctxSetGt2( ctxId ) ); + CoeffFracBits& cb = m_gtxFracBits[ ctxId ]; + int32_t par0 = (1<<SCALE_BITS) + int32_t(fbPar.intBits[0]); + int32_t par1 = (1<<SCALE_BITS) + int32_t(fbPar.intBits[1]); + cb.bits[0] = 0; + cb.bits[1] = fbGt1.intBits[0] + (1 << SCALE_BITS); + cb.bits[2] = fbGt1.intBits[1] + par0 + fbGt2.intBits[0]; + cb.bits[3] = fbGt1.intBits[1] + par1 + fbGt2.intBits[0]; + cb.bits[4] = fbGt1.intBits[1] + par0 + fbGt2.intBits[1]; + cb.bits[5] = fbGt1.intBits[1] + par1 + fbGt2.intBits[1]; + } + } + +#else class Rom { @@ -463,15 +956,23 @@ namespace DQIntern int32_t par0 = (1<<SCALE_BITS) + int32_t(fbPar.intBits[0]); int32_t par1 = (1<<SCALE_BITS) + int32_t(fbPar.intBits[1]); cb.bits[0] = 0; +#if JVET_L0274 + cb.bits[1] = fbGt1.intBits[0] + (1 << SCALE_BITS); + cb.bits[2] = fbGt1.intBits[1] + par0 + fbGt2.intBits[0]; + cb.bits[3] = fbGt1.intBits[1] + par1 + fbGt2.intBits[0]; + cb.bits[4] = fbGt1.intBits[1] + par0 + fbGt2.intBits[1]; + cb.bits[5] = fbGt1.intBits[1] + par1 + fbGt2.intBits[1]; +#else cb.bits[1] = par0 + fbGt1.intBits[0]; cb.bits[2] = par1 + fbGt1.intBits[0]; cb.bits[3] = par0 + fbGt1.intBits[1] + fbGt2.intBits[0]; cb.bits[4] = par1 + fbGt1.intBits[1] + fbGt2.intBits[0]; cb.bits[5] = par0 + fbGt1.intBits[1] + fbGt2.intBits[1]; cb.bits[6] = par1 + fbGt1.intBits[1] + fbGt2.intBits[1]; +#endif } } - +#endif @@ -482,6 +983,8 @@ namespace DQIntern /*===== =====*/ /*================================================================================*/ +#if JVET_L0274_ENCODER_SPEED_UP +#else enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 }; struct ScanInfo @@ -496,10 +999,15 @@ namespace DQIntern int insidePos; int nextInsidePos; NbInfoSbb nextNbInfoSbb; +#if JVET_L0274 + bool eosbb; + ScanPosType spt; +#else bool sosbb; bool eosbb; bool socsbb; bool eocsbb; +#endif int sbbPos; int nextSbbRight; int nextSbbBelow; @@ -536,10 +1044,19 @@ namespace DQIntern sbbPos = m_rateEst.sbbPos ( scanIdx ); lastOffset = m_rateEst.lastOffset ( scanIdx ); insidePos = scanIdx & m_sbbMask; +#if JVET_L0274 + eosbb = ( insidePos == 0 ); + spt = SCAN_ISCSBB; + if( insidePos == m_sbbMask && scanIdx > sbbSize && scanIdx < m_numCoeffMinus1 ) + spt = SCAN_SOCSBB; + else if( eosbb && scanIdx > 0 && scanIdx < m_numCoeffMinusSbb ) + spt = SCAN_EOCSBB; +#else sosbb = ( insidePos == m_sbbMask ); eosbb = ( insidePos == 0 ); socsbb = ( sosbb && scanIdx > sbbSize && scanIdx < m_numCoeffMinus1 ); eocsbb = ( eosbb && scanIdx > 0 && scanIdx < m_numCoeffMinusSbb ); +#endif if( scanIdx ) { const int nextScanIdx = scanIdx - 1; @@ -576,7 +1093,7 @@ namespace DQIntern const int m_numCoeffMinus1; const int m_numCoeffMinusSbb; }; - +#endif struct PQData { @@ -823,6 +1340,21 @@ namespace DQIntern inline void swap() { std::swap(m_currSbbCtx, m_prevSbbCtx); } +#if JVET_L0274_ENCODER_SPEED_UP + inline void reset( const TUParameters& tuPars, const RateEstimator &rateEst) + { + m_nbInfo = tuPars.m_scanId2NbInfoOut; + ::memcpy( m_sbbFlagBits, rateEst.sigSbbFracBits(), 2*sizeof(BinFracBits) ); + const int numSbb = tuPars.m_numSbb; + const int chunkSize = numSbb + tuPars.m_numCoeff; + uint8_t* nextMem = m_memory; + for( int k = 0; k < 8; k++, nextMem += chunkSize ) + { + m_allSbbCtx[k].sbbFlags = nextMem; + m_allSbbCtx[k].levels = nextMem + numSbb; + } + } +#else inline void reset(const RateEstimator &rateEst) { m_nbInfo = rateEst.nbInfoOut(); @@ -836,6 +1368,7 @@ namespace DQIntern m_allSbbCtx[k].levels = nextMem + numSbb; } } +#endif inline void update(const ScanInfo &scanInfo, const State *prevState, State &currState); @@ -848,6 +1381,16 @@ namespace DQIntern uint8_t m_memory[ 8 * ( MAX_TU_SIZE * MAX_TU_SIZE + MLS_GRP_NUM ) ]; }; +#if JVET_L0274 +#define RICEMAX 32 + const int32_t g_goRiceBits[4][RICEMAX] = + { + { 32768, 65536, 98304, 131072, 163840, 196608, 229376, 294912, 294912, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520 }, + { 65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984 }, + { 98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448 }, + { 131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376 } + }; +#endif class State { @@ -864,12 +1407,90 @@ namespace DQIntern { m_rdCost = std::numeric_limits<int64_t>::max()>>1; m_numSigSbb = 0; +#if JVET_L0274 + m_remRegBins = 3; // just large enough for last scan pos +#endif m_refSbbCtxId = -1; m_sigFracBits = m_sigFracBitsArray[ 0 ]; m_coeffFracBits = m_gtxFracBitsArray[ 0 ]; m_goRicePar = 0; +#if JVET_L0274 + m_goRiceZero = 0; +#endif } +#if JVET_L0274 + void checkRdCosts( const ScanPosType spt, const PQData& pqDataA, const PQData& pqDataB, Decision& decisionA, Decision& decisionB) const + { + const int32_t* goRiceTab = g_goRiceBits[m_goRicePar]; + int64_t rdCostA = m_rdCost + pqDataA.deltaDist; + int64_t rdCostB = m_rdCost + pqDataB.deltaDist; + int64_t rdCostZ = m_rdCost; + if( m_remRegBins >= 3 ) + { + if( pqDataA.absLevel < 4 ) + rdCostA += m_coeffFracBits.bits[pqDataA.absLevel]; + else + { + const unsigned value = (pqDataA.absLevel - 4) >> 1; + rdCostA += m_coeffFracBits.bits[pqDataA.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1]; + } + if( pqDataB.absLevel < 4 ) + rdCostB += m_coeffFracBits.bits[pqDataB.absLevel]; + else + { + const unsigned value = (pqDataB.absLevel - 4) >> 1; + rdCostB += m_coeffFracBits.bits[pqDataB.absLevel - (value << 1)] + goRiceTab[value<RICEMAX ? value : RICEMAX-1]; + } + if( spt == SCAN_ISCSBB ) + { + rdCostA += m_sigFracBits.intBits[1]; + rdCostB += m_sigFracBits.intBits[1]; + rdCostZ += m_sigFracBits.intBits[0]; + } + else if( spt == SCAN_SOCSBB ) + { + rdCostA += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1]; + rdCostB += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1]; + rdCostZ += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[0]; + } + else if( m_numSigSbb ) + { + rdCostA += m_sigFracBits.intBits[1]; + rdCostB += m_sigFracBits.intBits[1]; + rdCostZ += m_sigFracBits.intBits[0]; + } + else + { + rdCostZ = decisionA.rdCost; + } + } + else + { + rdCostA += (1 << SCALE_BITS) + goRiceTab[pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : (pqDataA.absLevel<RICEMAX ? pqDataA.absLevel : RICEMAX-1)]; + rdCostB += (1 << SCALE_BITS) + goRiceTab[pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : (pqDataB.absLevel<RICEMAX ? pqDataB.absLevel : RICEMAX-1)]; + rdCostZ += goRiceTab[m_goRiceZero]; + } + if( rdCostA < decisionA.rdCost ) + { + decisionA.rdCost = rdCostA; + decisionA.absLevel = pqDataA.absLevel; + decisionA.prevId = m_stateId; + } + if( rdCostZ < decisionA.rdCost ) + { + decisionA.rdCost = rdCostZ; + decisionA.absLevel = 0; + decisionA.prevId = m_stateId; + } + if( rdCostB < decisionB.rdCost ) + { + decisionB.rdCost = rdCostB; + decisionB.absLevel = pqDataB.absLevel; + decisionB.prevId = m_stateId; + } + } +#else template<ScanPosType spt> inline void checkRdCostZero(Decision &decision) const { int64_t rdCost = m_rdCost; @@ -943,10 +1564,24 @@ namespace DQIntern decision.prevId = m_stateId; } } +#endif inline void checkRdCostStart(int32_t lastOffset, const PQData &pqData, Decision &decision) const { +#if JVET_L0274 + int64_t rdCost = pqData.deltaDist + lastOffset; + if (pqData.absLevel < 4) + { + rdCost += m_coeffFracBits.bits[pqData.absLevel]; + } + else + { + const unsigned value = (pqData.absLevel - 4) >> 1; + rdCost += m_coeffFracBits.bits[pqData.absLevel - (value << 1)] + g_goRiceBits[m_goRicePar][value < RICEMAX ? value : RICEMAX-1]; + } +#else int64_t rdCost = pqData.deltaDist + lastOffset + getLevelBits( pqData.absLevel ); +#endif if( rdCost < decision.rdCost ) { decision.rdCost = rdCost; @@ -969,15 +1604,30 @@ namespace DQIntern private: int64_t m_rdCost; uint16_t m_absLevelsAndCtxInit[24]; // 16x8bit for abs levels + 16x16bit for ctx init id +#if JVET_L0274 + int8_t m_numSigSbb; + int8_t m_remRegBins; + int8_t m_refSbbCtxId; +#else int32_t m_numSigSbb; int32_t m_refSbbCtxId; +#endif BinFracBits m_sbbFracBits; BinFracBits m_sigFracBits; CoeffFracBits m_coeffFracBits; +#if JVET_L0274 + int8_t m_goRicePar; + int8_t m_goRiceZero; + const int8_t m_stateId; +#else int m_goRicePar; const int m_stateId; +#endif const BinFracBits*const m_sigFracBitsArray; const CoeffFracBits*const m_gtxFracBitsArray; +#if JVET_L0274 + const uint32_t*const m_goRiceZeroArray; +#endif CommonCtx& m_commonCtx; }; @@ -987,6 +1637,9 @@ namespace DQIntern , m_stateId ( stateId ) , m_sigFracBitsArray( rateEst.sigFlagBits(stateId) ) , m_gtxFracBitsArray( rateEst.gtxFracBits(stateId) ) +#if JVET_L0274 + , m_goRiceZeroArray ( g_auiGoRicePosCoeff0[std::max(0,stateId-1)] ) +#endif , m_commonCtx ( commonCtx ) { } @@ -1003,18 +1656,124 @@ namespace DQIntern m_numSigSbb = prvState->m_numSigSbb + !!decision.absLevel; m_refSbbCtxId = prvState->m_refSbbCtxId; m_sbbFracBits = prvState->m_sbbFracBits; +#if JVET_L0274 + m_remRegBins = prvState->m_remRegBins - 1; + m_goRicePar = prvState->m_goRicePar; + if( m_remRegBins >= 3 ) + { + TCoeff rem = (decision.absLevel - 4) >> 1; + if( m_goRicePar < 3 && rem > (3<<m_goRicePar)-1 ) + { + m_goRicePar++; + } + m_remRegBins -= std::min<TCoeff>( decision.absLevel, 2 ); + } +#endif ::memcpy( m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48*sizeof(uint8_t) ); } else { m_numSigSbb = 1; m_refSbbCtxId = -1; +#if JVET_L0274 + if ( scanInfo.sbbSize == 4 ) + { + m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - MAX_NUM_GT2_BINS_2x2SUBBLOCK - std::min<TCoeff>( decision.absLevel, 2 ); + } + else + { + m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - MAX_NUM_GT2_BINS_4x4SUBBLOCK - std::min<TCoeff>( decision.absLevel, 2 ); + } + m_goRicePar = ( ((decision.absLevel - 4) >> 1) > (3<<0)-1 ? 1 : 0 ); +#endif ::memset( m_absLevelsAndCtxInit, 0, 48*sizeof(uint8_t) ); } uint8_t* levels = reinterpret_cast<uint8_t*>(m_absLevelsAndCtxInit); levels[ scanInfo.insidePos ] = (uint8_t)std::min<TCoeff>( 255, decision.absLevel ); +#if JVET_L0274 + if (m_remRegBins >= 3) + { + TCoeff tinit = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos]; + TCoeff sumAbs1 = (tinit >> 3) & 31; + TCoeff sumNum = tinit & 7; +#define UPDATE(k) {TCoeff t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs1+=std::min<TCoeff>(2+(t&1),t); sumNum+=!!t; } + if (numIPos == 1) + { + UPDATE(0); + } + else if (numIPos == 2) + { + UPDATE(0); + UPDATE(1); + } + else if (numIPos == 3) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + } + else if (numIPos == 4) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + } + else if (numIPos == 5) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + UPDATE(4); + } +#undef UPDATE + TCoeff sumGt1 = sumAbs1 - sumNum; + m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + (sumAbs1 < 5 ? sumAbs1 : 5)]; + m_coeffFracBits = m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)]; + } + else + { + TCoeff sumAbs = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8; +#define UPDATE(k) {TCoeff t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; } + if (numIPos == 1) + { + UPDATE(0); + } + else if (numIPos == 2) + { + UPDATE(0); + UPDATE(1); + } + else if (numIPos == 3) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + } + else if (numIPos == 4) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + } + else if (numIPos == 5) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + UPDATE(4); + } +#undef UPDATE + sumAbs = std::min(31, sumAbs); + m_goRicePar = g_auiGoRiceParsCoeff[sumAbs]; + m_goRiceZero = m_goRiceZeroArray[sumAbs]; + } +#else TCoeff tinit = m_absLevelsAndCtxInit[ 8 + scanInfo.nextInsidePos ]; TCoeff sumAbs = tinit >> 8; TCoeff sumAbs1 = ( tinit >> 3 ) & 31; @@ -1056,6 +1815,7 @@ namespace DQIntern m_sigFracBits = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + ( sumAbs1 < 5 ? sumAbs1 : 5 ) ]; m_coeffFracBits = m_gtxFracBitsArray[ scanInfo.gtxCtxOffsetNext + ( sumGt1 < 4 ? sumGt1 : 4 ) ]; m_goRicePar = g_auiGoRicePars [ sumAbs < 31 ? sumAbs : 31 ]; +#endif } } @@ -1084,11 +1844,17 @@ namespace DQIntern TCoeff tinit = m_absLevelsAndCtxInit[ 8 + scanInfo.nextInsidePos ]; TCoeff sumNum = tinit & 7; TCoeff sumAbs1 = ( tinit >> 3 ) & 31; +#if JVET_L0274 +#else TCoeff sumAbs = ( tinit >> 8 ) - sumNum; +#endif TCoeff sumGt1 = sumAbs1 - sumNum; m_sigFracBits = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + ( sumAbs1 < 5 ? sumAbs1 : 5 ) ]; m_coeffFracBits = m_gtxFracBitsArray[ scanInfo.gtxCtxOffsetNext + ( sumGt1 < 4 ? sumGt1 : 4 ) ]; +#if JVET_L0274 +#else m_goRicePar = g_auiGoRicePars [ sumAbs < 31 ? sumAbs : 31 ]; +#endif } } @@ -1112,6 +1878,17 @@ namespace DQIntern const int sigNSbb = ( ( scanInfo.nextSbbRight ? sbbFlags[ scanInfo.nextSbbRight ] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[ scanInfo.nextSbbBelow ] : false ) ? 1 : 0 ); currState.m_numSigSbb = 0; +#if JVET_L0274 + if (scanInfo.sbbSize == 4) + { + currState.m_remRegBins = MAX_NUM_REG_BINS_2x2SUBBLOCK - MAX_NUM_GT2_BINS_2x2SUBBLOCK; + } + else + { + currState.m_remRegBins = MAX_NUM_REG_BINS_4x4SUBBLOCK - MAX_NUM_GT2_BINS_4x4SUBBLOCK; + } +#endif + currState.m_goRicePar = 0; currState.m_refSbbCtxId = currState.m_stateId; currState.m_sbbFracBits = m_sbbFlagBits[ sigNSbb ]; @@ -1124,7 +1901,11 @@ namespace DQIntern if( nbOut->num ) { TCoeff sumAbs = 0, sumAbs1 = 0, sumNum = 0; +#if JVET_L0274 +#define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(2+(t&1),t); sumNum+=!!t; } +#else #define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(4-(t&1),t); sumNum+=!!t; } +#endif UPDATE(0); if( nbOut->num > 1 ) { @@ -1171,8 +1952,12 @@ namespace DQIntern private: void xDecideAndUpdate ( const TCoeff absCoeff, const ScanInfo& scanInfo ); +#if JVET_L0274 + void xDecide ( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions ); +#else template<ScanPosType spt> void xDecide ( const TCoeff absCoeff, int32_t lastOffset, Decision* decisions ); +#endif private: CommonCtx m_commonCtx; @@ -1210,13 +1995,23 @@ namespace DQIntern #undef DINIT +#if JVET_L0274 + void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions) +#else template<ScanPosType spt> void DepQuant::xDecide( const TCoeff absCoeff, int32_t lastOffset, Decision* decisions ) +#endif { ::memcpy( decisions, startDec, 8*sizeof(Decision) ); PQData pqData[4]; m_quant.preQuantCoeff( absCoeff, pqData ); +#if JVET_L0274 + m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2]); + m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0]); + m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3]); + m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]); +#else m_prevStates[0].checkRdCostNonZero<spt> ( pqData[0], decisions[0] ); m_prevStates[0].checkRdCostNonZero<spt> ( pqData[2], decisions[2] ); m_prevStates[0].checkRdCostZero<spt> ( decisions[0] ); @@ -1229,6 +2024,7 @@ namespace DQIntern m_prevStates[3].checkRdCostNonZero<spt> ( pqData[1], decisions[1] ); m_prevStates[3].checkRdCostNonZero<spt> ( pqData[3], decisions[3] ); m_prevStates[3].checkRdCostZero<spt> ( decisions[3] ); +#endif if( spt==SCAN_EOCSBB ) { m_skipStates[0].checkRdCostSkipSbb( decisions[0] ); @@ -1246,9 +2042,13 @@ namespace DQIntern std::swap( m_prevStates, m_currStates ); +#if JVET_L0274 + xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions); +#else if ( scanInfo.socsbb ) { xDecide<SCAN_SOCSBB>( absCoeff, scanInfo.lastOffset, decisions ); } else if( scanInfo.eocsbb ) { xDecide<SCAN_EOCSBB>( absCoeff, scanInfo.lastOffset, decisions ); } else { xDecide<SCAN_ISCSBB>( absCoeff, scanInfo.lastOffset, decisions ); } +#endif if( scanInfo.scanIdx ) { @@ -1303,7 +2103,11 @@ namespace DQIntern } } +#if JVET_L0274 + if( scanInfo.spt == SCAN_SOCSBB ) +#else if( scanInfo.socsbb ) +#endif { std::swap( m_prevStates, m_skipStates ); } @@ -1313,8 +2117,16 @@ namespace DQIntern void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum ) { +#if JVET_L0274 + CHECKD( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(), "ext precision is not supported" ); +#endif + //===== reset / pre-init ===== +#if JVET_L0274_ENCODER_SPEED_UP + const TUParameters& tuPars = *g_Rom.getTUPars( tu.blocks[compID], compID ); +#else RateEstimator::initBlock ( tu, compID ); +#endif m_quant.initQuantBlock ( tu, compID, cQP, lambda ); TCoeff* qCoeff = tu.getCoeffs( compID ).buf; const TCoeff* tCoeff = srcCoeff.buf; @@ -1327,7 +2139,11 @@ namespace DQIntern const TCoeff thres = m_quant.getLastThreshold(); for( ; firstTestPos >= 0; firstTestPos-- ) { +#if JVET_L0274_ENCODER_SPEED_UP + if( abs( tCoeff[ tuPars.m_scanId2BlkPos[firstTestPos] ] ) > thres ) +#else if( abs( tCoeff[ rasterPos(firstTestPos) ] ) > thres ) +#endif { break; } @@ -1338,8 +2154,13 @@ namespace DQIntern } //===== real init ===== +#if JVET_L0274_ENCODER_SPEED_UP + RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() ); + m_commonCtx.reset( tuPars, *this ); +#else RateEstimator::initCtx( tu, ctx.getFracBitsAcess() ); m_commonCtx.reset( *this ); +#endif for( int k = 0; k < 12; k++ ) { m_allStates[k].init(); @@ -1348,10 +2169,18 @@ namespace DQIntern //===== populate trellis ===== +#if JVET_L0274_ENCODER_SPEED_UP + for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- ) + { + const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ]; + xDecideAndUpdate( abs( tCoeff[ scanInfo.rasterPos ] ), scanInfo ); + } +#else for( ScanData scanData(*this,firstTestPos); scanData.valid(); scanData.next() ) { xDecideAndUpdate( abs( tCoeff[ scanData.rasterPos ] ), scanData ); } +#endif //===== find best path ===== Decision decision = { std::numeric_limits<int64_t>::max(), -1, -2 }; @@ -1371,7 +2200,11 @@ namespace DQIntern for( ; decision.prevId >= 0; scanIdx++ ) { decision = m_trellis[ scanIdx ][ decision.prevId ]; +#if JVET_L0274_ENCODER_SPEED_UP + int32_t blkpos = tuPars.m_scanId2BlkPos[ scanIdx ]; +#else int32_t blkpos = rasterPos( scanIdx ); +#endif qCoeff[ blkpos ] = ( tCoeff[ blkpos ] < 0 ? -decision.absLevel : decision.absLevel ); absSum += decision.absLevel; } diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp index bae190c979f639f13148e00ecee72d8e2397dc19..9105165a1a70b51acc63f67c59330671df67fa90 100644 --- a/source/Lib/CommonLib/QuantRDOQ.cpp +++ b/source/Lib/CommonLib/QuantRDOQ.cpp @@ -112,6 +112,11 @@ inline uint32_t QuantRDOQ::xGetCodedLevel( double& rd64CodedCost, const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, +#if JVET_L0274 + const int remGt2Bins, + const int remRegBins, + unsigned goRiceZero, +#endif uint16_t ui16AbsGoRice, int iQBits, double errorScale, @@ -146,7 +151,11 @@ inline uint32_t QuantRDOQ::xGetCodedLevel( double& rd64CodedCost, for( int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- ) { double dErr = double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) ); +#if JVET_L0274 + double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, ui16AbsGoRice, useLimitedPrefixLength, maxLog2TrDynamicRange ) ); +#else double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, ui16AbsGoRice, useLimitedPrefixLength, maxLog2TrDynamicRange ) ); +#endif dCurrCost += dCurrCostSig; if( dCurrCost < rd64CodedCost ) @@ -175,15 +184,68 @@ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, +#if JVET_L0274 + const int remGt2Bins, + const int remRegBins, + unsigned goRiceZero, +#endif const uint16_t ui16AbsGoRice, const bool useLimitedPrefixLength, const int maxLog2TrDynamicRange ) const { +#if JVET_L0274 + if( remRegBins < 3 ) + { + int iRate = int( xGetIEPRate() ); // cost of sign bit + uint32_t symbol = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel ); + uint32_t length; + const int threshold = g_auiGoRiceRange[ui16AbsGoRice]; + if( symbol < ( threshold << ui16AbsGoRice ) ) + { + length = symbol >> ui16AbsGoRice; + iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; + } + else if( useLimitedPrefixLength ) + { + const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) ); + + uint32_t prefixLength = 0; + uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION; + + while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) ) + { + prefixLength++; + } + + const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ ); + + iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS; + } + else + { + length = ui16AbsGoRice; + symbol = symbol - ( threshold << ui16AbsGoRice ); + while( symbol >= ( 1 << length ) ) + { + symbol -= ( 1 << ( length++ ) ); + } + iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; + } + return iRate; + } + + int iRate = int( xGetIEPRate() ); // cost of sign bit + const uint32_t cthres = ( remGt2Bins ? 4 : 2 ); + if( uiAbsLevel >= cthres ) + { + uint32_t symbol = ( uiAbsLevel - cthres ) >> 1; +#else int iRate = int( xGetIEPRate() ); // cost of sign bit if( uiAbsLevel >= 5 ) { uint32_t symbol = ( uiAbsLevel - 5 ) >> 1; +#endif uint32_t length; const int threshold = g_auiGoRiceRange[ui16AbsGoRice]; if( symbol < ( threshold << ui16AbsGoRice ) ) @@ -218,32 +280,60 @@ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; } +#if JVET_L0274 + iRate += fracBitsGt1.intBits[1]; + iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1]; + if( remGt2Bins ) + { + iRate += fracBitsGt2.intBits[1]; + } +#else iRate += fracBitsPar.intBits[( uiAbsLevel - 1 ) & 1]; iRate += fracBitsGt1.intBits[1]; iRate += fracBitsGt2.intBits[1]; +#endif } else if( uiAbsLevel == 1 ) { +#if JVET_L0274 + iRate += fracBitsGt1.intBits[0]; +#else iRate += fracBitsPar.intBits[0]; iRate += fracBitsGt1.intBits[0]; +#endif } else if( uiAbsLevel == 2 ) { +#if JVET_L0274 + iRate += fracBitsGt1.intBits[1]; + iRate += fracBitsPar.intBits[0]; + iRate += fracBitsGt2.intBits[0]; +#else iRate += fracBitsPar.intBits[1]; iRate += fracBitsGt1.intBits[0]; +#endif } else if( uiAbsLevel == 3 ) { +#if JVET_L0274 + iRate += fracBitsGt1.intBits[1]; + iRate += fracBitsPar.intBits[1]; + iRate += fracBitsGt2.intBits[0]; +#else iRate += fracBitsPar.intBits[0]; iRate += fracBitsGt1.intBits[1]; iRate += fracBitsGt2.intBits[0]; +#endif } +#if JVET_L0274 +#else else if( uiAbsLevel == 4 ) { iRate += fracBitsPar.intBits[1]; iRate += fracBitsGt1.intBits[1]; iRate += fracBitsGt2.intBits[0]; } +#endif else { iRate = 0; @@ -641,6 +731,12 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, double d64BaseCost = 0; int iLastScanPos = -1; +#if JVET_L0274 + bool is2x2subblock = ( iCGSizeM1 == 3 ); + int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); + int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; + uint32_t goRiceParam = 0; +#endif double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig; memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); @@ -714,7 +810,17 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, uint32_t uiParCtx = cctx.parityCtxIdAbs ( ctxOffset ); uint32_t uiGt1Ctx = cctx.greater1CtxIdAbs ( ctxOffset ); uint32_t uiGt2Ctx = cctx.greater2CtxIdAbs ( ctxOffset ); +#if JVET_L0274 + uint32_t goRiceZero = 0; + if( remRegBins < 3 ) + { + unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff ); + goRiceParam = g_auiGoRiceParsCoeff [ sumAbs ]; + goRiceZero = g_auiGoRicePosCoeff0[0][ sumAbs ]; + } +#else uint32_t uiGoRiceParam = cctx.GoRiceParAbs ( iScanPos, piDstCoeff ); +#endif const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx ); const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx ); @@ -722,18 +828,31 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, if( iScanPos == iLastScanPos ) { +#if JVET_L0274 + uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], + lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange ); +#else uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, uiGoRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange ); +#endif } else { DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig ); const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); +#if JVET_L0274 + uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], + lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange ); +#if HEVC_USE_SIGN_HIDING + sigRateDelta[ uiBlkPos ] = ( remRegBins < 3 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] ); +#endif +#else uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, uiGoRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange ); #if HEVC_USE_SIGN_HIDING sigRateDelta[ uiBlkPos ] = fracBitsSig.intBits[1] - fracBitsSig.intBits[0]; +#endif #endif } @@ -746,18 +865,57 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, if( uiLevel > 0 ) { +#if JVET_L0274 + int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); + rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; + rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; +#else int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, uiGoRiceParam, extendedPrecision, maxLog2TrDynamicRange ); rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, uiGoRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, uiGoRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; +#endif } else // uiLevel == 0 { +#if JVET_L0274 + if( remRegBins < 3 ) + { + int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); + rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remGt2Bins, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; + } + else + { + rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ]; + } +#else rateIncUp [ uiBlkPos ] = fracBitsPar.intBits[ 0 ] + fracBitsGt1.intBits[ 0 ]; +#endif } #endif piDstCoeff[ uiBlkPos ] = uiLevel; d64BaseCost += pdCostCoeff [ iScanPos ]; +#if JVET_L0274 + if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) ) + { + remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); + remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; + goRiceParam = 0; + } + else if( remRegBins >= 3 ) + { + const uint32_t baseLevel = ( remGt2Bins ? 4 : 2 ); + if( goRiceParam < 3 && ((uiLevel-baseLevel)>>1) > (3<<goRiceParam)-1 ) + { + goRiceParam++; + } + if( uiLevel >= 2 && remGt2Bins ) + { + remGt2Bins--; + } + remRegBins -= std::min<int>( uiLevel, 2 ) + (iScanPos != iLastScanPos); + } +#endif } else { diff --git a/source/Lib/CommonLib/QuantRDOQ.h b/source/Lib/CommonLib/QuantRDOQ.h index 71cccd5a597d390f3d49429a0f57eb382da2b1be..e3e242ac66057d7fb63a2bf9e1b3ad950146b639 100644 --- a/source/Lib/CommonLib/QuantRDOQ.h +++ b/source/Lib/CommonLib/QuantRDOQ.h @@ -91,6 +91,11 @@ private: const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, +#if JVET_L0274 + const int remGt2Bins, + const int remRegBins, + unsigned goRiceZero, +#endif uint16_t ui16AbsGoRice, int iQBits, double errorScale, @@ -101,6 +106,11 @@ private: const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, +#if JVET_L0274 + const int remGt2Bins, + const int remRegBins, + unsigned goRiceZero, +#endif const uint16_t ui16AbsGoRice, const bool useLimitedPrefixLength, const int maxLog2TrDynamicRange ) const; diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 55061897d875aa7670c99123b2b5bfab943448b3..78239767501b7f9e264dc3b2390ee780b195251c 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -660,6 +660,18 @@ const uint32_t g_uiMinInGroup[LAST_SIGNIFICANT_GROUPS] = { 0,1,2,3,4,6,8,12,16,2 const uint32_t g_uiGroupIdx[MAX_TU_SIZE] = { 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11 ,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12 ,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; +#if JVET_L0274 +const uint32_t g_auiGoRiceParsCoeff[32] = +{ + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 +}; +const uint32_t g_auiGoRicePosCoeff0[3][32] = +{ + {0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8}, + {1, 1, 1, 1, 2, 3, 4, 4, 4, 6, 6, 6, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16}, + {1, 1, 2, 2, 2, 3, 4, 4, 4, 6, 6, 6, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16} +}; +#else const uint32_t g_auiGoRicePars[ 32 ] = { 0, 0, 0, 0, @@ -667,6 +679,7 @@ const uint32_t g_auiGoRicePars[ 32 ] = 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }; +#endif const uint32_t g_auiGoRiceRange[MAX_GR_ORDER_RESIDUAL] = { 6, 5, 6, COEF_REMAIN_BIN_REDUCTION, COEF_REMAIN_BIN_REDUCTION, COEF_REMAIN_BIN_REDUCTION, COEF_REMAIN_BIN_REDUCTION, COEF_REMAIN_BIN_REDUCTION, COEF_REMAIN_BIN_REDUCTION, COEF_REMAIN_BIN_REDUCTION diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index e60c5127eab38cf836243de3a4e66220c365c514..325d7492736af7ef45a5be6694802c085606f5dc 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -90,7 +90,12 @@ extern const uint32_t ctxIndMap4x4[4*4]; extern const uint32_t g_uiGroupIdx[ MAX_TU_SIZE ]; extern const uint32_t g_uiMinInGroup[ LAST_SIGNIFICANT_GROUPS ]; +#if JVET_L0274 +extern const uint32_t g_auiGoRiceParsCoeff [ 32 ]; +extern const uint32_t g_auiGoRicePosCoeff0[ 3 ][ 32 ]; +#else extern const uint32_t g_auiGoRicePars [ 32 ]; +#endif extern const uint32_t g_auiGoRiceRange[ MAX_GR_ORDER_RESIDUAL ]; //!< maximum value coded with Rice codes // ==================================================================================================================== diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 5a9597d5807b166a59f4e80d9fbe07ffe04503c2..b2643b958b6766cf35bf710fc7332d3cace3fe76 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,8 @@ #include <assert.h> #include <cassert> +#define JVET_L0090_PAIR_AVG 1 // Add pairwise average candidates, replace HEVC combined candidates + #define JVET_L0392_ALF_INIT_STATE 1 #define JVET_L0664_ALF_REMOVE_LUMA_5x5 1 @@ -60,6 +62,15 @@ #define JVET_L0194_ONE_CTX_FOR_MRG_IDX 1 // one context for full-block Merge index +#define JVET_L0274 1 +#define JVET_L0274_ENCODER_SPEED_UP ( 1 && JVET_L0274 ) // encoder speed-up by pre-calculating position dependent parameters + + + + + + + @@ -71,6 +82,8 @@ #define REMOVE_MV_ADAPT_PREC 1 // remove the high precision flag in the MV class +#define JVET_L0093_SIMP_PRUNE 1 + #ifndef JVET_B0051_NON_MPM_MODE #define JVET_B0051_NON_MPM_MODE ( 1 && JEM_TOOLS ) #endif @@ -166,7 +179,11 @@ #define HM_EMT_NSST_AS_IN_JEM 1 // #define HM_MDIS_AS_IN_JEM 1 // *** - PM: not filtering ref. samples for 64xn case and using Planar MDIS condition at encoder #define HM_JEM_CLIP_PEL 1 // *** +#if JVET_L0093_SIMP_PRUNE +#define HM_JEM_MERGE_CANDS 0 // *** +#else #define HM_JEM_MERGE_CANDS 1 // *** +#endif #endif//JEM_COMP diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 6d0c6f999c0565746035886fd95d87b84e3aca24..50a027e071af64297cd7c24ed9ea49e4c8c4366e 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -502,11 +502,16 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co const uint32_t maxNumMergeCand = slice.getMaxNumMergeCand(); const bool canFastExit = pu.cs->pps->getLog2ParallelMergeLevelMinus2() == 0; +#if !JVET_L0090_PAIR_AVG + // this variable is unused if remove HEVC combined candidates bool isCandInter[MRG_MAX_NUM_CANDS]; +#endif for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui) { +#if !JVET_L0090_PAIR_AVG isCandInter[ui] = false; +#endif #if JVET_L0646_GBI mrgCtx.GBiIdx[ui] = GBI_DEFAULT; #endif @@ -535,7 +540,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co { miLeft = puLeft->getMotionInfo( posLB.offset(-1, 0) ); +#if !JVET_L0090_PAIR_AVG isCandInter[cnt] = true; +#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; @@ -576,7 +583,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( !isAvailableA1 || ( miAbove != miLeft ) ) { +#if !JVET_L0090_PAIR_AVG isCandInter[cnt] = true; +#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; @@ -621,7 +630,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( !isAvailableB1 || ( miAbove != miAboveRight ) ) #endif { +#if !JVET_L0090_PAIR_AVG isCandInter[cnt] = true; +#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir; @@ -665,7 +676,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( !isAvailableA1 || ( miBelowLeft != miLeft ) ) #endif { +#if !JVET_L0090_PAIR_AVG isCandInter[cnt] = true; +#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miBelowLeft.interDir; @@ -715,7 +728,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( isAvailableSubPu ) { +#if !JVET_L0090_PAIR_AVG isCandInter[cnt] = true; +#endif mrgCtx.mrgTypeNeighbours[cnt] = MRG_TYPE_SUBPU_ATMVP; @@ -751,7 +766,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( ( !isAvailableA1 || ( miLeft != miAboveLeft ) ) && ( !isAvailableB1 || ( miAbove != miAboveLeft ) ) ) #endif { +#if !JVET_L0090_PAIR_AVG isCandInter[cnt] = true; +#endif // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveLeft.interDir; @@ -885,11 +902,12 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co if( addTMvp ) { mrgCtx.interDirNeighbours[uiArrayAddr] = dir; +#if !JVET_L0090_PAIR_AVG + isCandInter [uiArrayAddr] = true; +#endif #if JVET_L0646_GBI - mrgCtx.GBiIdx [uiArrayAddr] = GBI_DEFAULT; + mrgCtx.GBiIdx[uiArrayAddr] = GBI_DEFAULT; #endif - isCandInter [uiArrayAddr] = true; - if( mrgCandIdx == cnt && canFastExit ) { return; @@ -906,7 +924,97 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co return; } +#if JVET_L0090_PAIR_AVG + // pairwise-average candidates + { + const int cutoff = std::min( cnt, 4 ); + const int end = cutoff * (cutoff - 1) / 2; + constexpr int PRIORITY_LIST0[] = { 0, 0, 1, 0, 1, 2 }; + constexpr int PRIORITY_LIST1[] = { 1, 2, 2, 3, 3, 3 }; + + for( int idx = 0; idx < end && cnt != maxNumMergeCand; idx++ ) + { + const int i = PRIORITY_LIST0[idx]; + const int j = PRIORITY_LIST1[idx]; + + mrgCtx.mvFieldNeighbours[cnt * 2].setMvField( Mv( 0, 0 ), NOT_VALID ); + mrgCtx.mvFieldNeighbours[cnt * 2 + 1].setMvField( Mv( 0, 0 ), NOT_VALID ); + // calculate average MV for L0 and L1 seperately + unsigned char interDir = 0; + for( int refListId = 0; refListId < (slice.isInterB() ? 2 : 1); refListId++ ) + { + const short refIdxI = mrgCtx.mvFieldNeighbours[i * 2 + refListId].refIdx; + const short refIdxJ = mrgCtx.mvFieldNeighbours[j * 2 + refListId].refIdx; + + // both MVs are invalid, skip + if( (refIdxI == NOT_VALID) && (refIdxJ == NOT_VALID) ) + { + continue; + } + + interDir += 1 << refListId; + // both MVs are valid, average these two MVs + if( (refIdxI != NOT_VALID) && (refIdxJ != NOT_VALID) ) + { + const Mv& MvI = mrgCtx.mvFieldNeighbours[i * 2 + refListId].mv; + const Mv& MvJ = mrgCtx.mvFieldNeighbours[j * 2 + refListId].mv; + + // average two MVs + Mv avgMv = MvI; +#if !REMOVE_MV_ADAPT_PREC + if( pu.cs->sps->getSpsNext().getUseHighPrecMv() ) + { + avgMv.setHighPrec(); + } +#endif + avgMv += MvJ; + avgMv.setHor( avgMv.getHor() / 2 ); + avgMv.setVer( avgMv.getVer() / 2 ); + + mrgCtx.mvFieldNeighbours[cnt * 2 + refListId].setMvField( avgMv, refIdxI ); + } + // only one MV is valid, take the only one MV + else if( refIdxI != NOT_VALID ) + { + Mv singleMv = mrgCtx.mvFieldNeighbours[i * 2 + refListId].mv; +#if !REMOVE_MV_ADAPT_PREC + if( pu.cs->sps->getSpsNext().getUseHighPrecMv() ) + { + singleMv.setHighPrec(); + } +#endif + mrgCtx.mvFieldNeighbours[cnt * 2 + refListId].setMvField( singleMv, refIdxI ); + } + else if( refIdxJ != NOT_VALID ) + { + Mv singleMv = mrgCtx.mvFieldNeighbours[j * 2 + refListId].mv; +#if !REMOVE_MV_ADAPT_PREC + if( pu.cs->sps->getSpsNext().getUseHighPrecMv() ) + { + singleMv.setHighPrec(); + } +#endif + mrgCtx.mvFieldNeighbours[cnt * 2 + refListId].setMvField( singleMv, refIdxJ ); + } + } + + mrgCtx.interDirNeighbours[cnt] = interDir; + if( interDir > 0 ) + { + cnt++; + } + } + + // early termination + if( cnt == maxNumMergeCand ) + { + return; + } + } +#endif + uint32_t uiArrayAddr = cnt; +#if !JVET_L0090_PAIR_AVG uint32_t uiCutoff = std::min( uiArrayAddr, 4u ); if (slice.isInterB()) @@ -952,6 +1060,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co { return; } +#endif int iNumRefIdx = slice.isInterB() ? std::min(slice.getNumRefIdx(REF_PIC_LIST_0), slice.getNumRefIdx(REF_PIC_LIST_1)) : slice.getNumRefIdx(REF_PIC_LIST_0); @@ -959,7 +1068,9 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co int refcnt = 0; while (uiArrayAddr < maxNumMergeCand) { +#if !JVET_L0090_PAIR_AVG isCandInter [uiArrayAddr ] = true; +#endif mrgCtx.interDirNeighbours [uiArrayAddr ] = 1; #if JVET_L0646_GBI mrgCtx.GBiIdx [uiArrayAddr ] = GBI_DEFAULT; diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index f2661b1c1f3adc36c39bb5ddf1e6a5a8694f70b3..1c7fcab067343d766f7ecab778b6910f90945fd2 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -2216,7 +2216,10 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co } uint8_t ctxOffset[16]; +#if JVET_L0274 +#else unsigned nextPass = 0; +#endif //===== decode absolute values ===== const int inferSigPos = nextSigPos != cctx.scanPosLast() ? ( cctx.isNotFirst() ? minSubPos : -1 ) : nextSigPos; @@ -2225,9 +2228,20 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co int lastNZPos = -1; #endif int numNonZero = 0; +#if JVET_L0274 + bool is2x2subblock = ( cctx.log2CGSize() == 2 ); + int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); + int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; + int firstPosMode2 = minSubPos - 1; + int firstPosMode1 = minSubPos - 1; +#endif int sigBlkPos[ 1 << MLS_CG_SIZE ]; +#if JVET_L0274 + for( ; nextSigPos >= minSubPos && remRegBins >= 3; nextSigPos-- ) +#else for( ; nextSigPos >= minSubPos; nextSigPos-- ) +#endif { int blkPos = cctx.blockPos( nextSigPos ); unsigned sigFlag = ( !numNonZero && nextSigPos == inferSigPos ); @@ -2237,6 +2251,9 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co const unsigned sigCtxId = cctx.sigCtxIdAbs( nextSigPos, coeff, state ); sigFlag = m_BinDecoder.decodeBin( sigCtxId ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId ); +#if JVET_L0274 + remRegBins--; +#endif } if( sigFlag ) @@ -2249,6 +2266,27 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co lastNZPos = std::max<int>( lastNZPos, nextSigPos ); #endif +#if JVET_L0274 + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_gt1 ); + unsigned gt1Flag = m_BinDecoder.decodeBin( cctx.greater1CtxIdAbs(ctxOff) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt1_flag() bin=%d ctx=%d\n", gt1Flag, cctx.greater1CtxIdAbs(ctxOff) ); + remRegBins--; + + unsigned parFlag = 0; + if( gt1Flag ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_par ); + parFlag = m_BinDecoder.decodeBin( cctx.parityCtxIdAbs( ctxOff ) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "par_flag() bin=%d ctx=%d\n", parFlag, cctx.parityCtxIdAbs( ctxOff ) ); + + remRegBins--; + if( remGt2Bins && !--remGt2Bins ) + { + firstPosMode1 = nextSigPos - 1; + } + } + coeff[ blkPos ] += 1 + parFlag + gt1Flag; +#else RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_par ); unsigned parFlag = m_BinDecoder.decodeBin( cctx.parityCtxIdAbs(ctxOff) ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "par_flag() bin=%d ctx=%d\n", parFlag, cctx.parityCtxIdAbs(ctxOff) ); @@ -2258,11 +2296,85 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt1_flag() bin=%d ctx=%d\n", gt1Flag, cctx.greater1CtxIdAbs(ctxOff) ); coeff[blkPos] += 1+parFlag+(gt1Flag<<1); nextPass |= gt1Flag; +#endif } state = ( stateTransTable >> ((state<<2)+((coeff[blkPos]&1)<<1)) ) & 3; } +#if JVET_L0274 + firstPosMode2 = nextSigPos; + firstPosMode1 = ( firstPosMode1 > firstPosMode2 ? firstPosMode1 : firstPosMode2 ); +#endif +#if JVET_L0274 + //===== 2nd PASS: gt2 ===== + for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) + { + TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; + if( tcoeff >= 2 ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_gt2 ); + uint8_t& ctxOff = ctxOffset[ scanPos - minSubPos ]; + unsigned gt2Flag = m_BinDecoder.decodeBin( cctx.greater2CtxIdAbs(ctxOff) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt2_flag() bin=%d ctx=%d\n", gt2Flag, cctx.greater2CtxIdAbs(ctxOff) ); + tcoeff += (gt2Flag<<1); + } + } + + //===== 3rd PASS: Go-rice codes ===== + unsigned ricePar = 0; + for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) + { + TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; + if( tcoeff >= 4 ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_escs ); + int rem = m_BinDecoder.decodeRemAbsEP( ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar ); + tcoeff += (rem<<1); + if( ricePar < 3 && rem > (3<<ricePar)-1 ) + { + ricePar++; + } + } + } + for( int scanPos = firstPosMode1; scanPos > firstPosMode2; scanPos-- ) + { + TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ]; + if( tcoeff >= 2 ) + { + RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_escs ); + int rem = m_BinDecoder.decodeRemAbsEP( ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar ); + tcoeff += (rem<<1); + if( ricePar < 3 && rem > (3<<ricePar)-1 ) + { + ricePar++; + } + } + } + + //===== coeff bypass ==== + for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- ) + { + int sumAll = cctx.templateAbsSum(scanPos, coeff); + int rice = g_auiGoRiceParsCoeff [sumAll]; + int pos0 = g_auiGoRicePosCoeff0[std::max(0, state - 1)][sumAll]; + int rem = m_BinDecoder.decodeRemAbsEP( rice, cctx.extPrec(), cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice ); + TCoeff tcoeff = ( rem == pos0 ? 0 : rem < pos0 ? rem+1 : rem ); + state = ( stateTransTable >> ((state<<2)+((tcoeff&1)<<1)) ) & 3; + if( tcoeff ) + { + int blkPos = cctx.blockPos( scanPos ); + sigBlkPos[ numNonZero++ ] = blkPos; +#if HEVC_USE_SIGN_HIDING + lastNZPos = std::max<int>( lastNZPos, scanPos ); +#endif + coeff[blkPos] = tcoeff; + } + } +#else //===== 2nd PASS: gt2 ===== if( nextPass ) { @@ -2298,6 +2410,7 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co } } } +#endif //===== decode sign's ===== RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__SIGN_BIT, Size( cctx.width(), cctx.height() ), cctx.compID() ); diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index ad0f9ba6249fb22ff39538d84cc05f826f3cad10..6052f00ae25639b7b5793cc66e052bf9edf14f13 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -2110,7 +2110,10 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe } uint8_t ctxOffset[16]; +#if JVET_L0274 +#else unsigned nextPass = 0; +#endif //===== encode absolute values ===== const int inferSigPos = nextSigPos != cctx.scanPosLast() ? ( cctx.isNotFirst() ? minSubPos : -1 ) : nextSigPos; @@ -2121,8 +2124,19 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe int remAbsLevel = -1; int numNonZero = 0; unsigned signPattern = 0; +#if JVET_L0274 + bool is2x2subblock = ( cctx.log2CGSize() == 2 ); + int remGt2Bins = ( is2x2subblock ? MAX_NUM_GT2_BINS_2x2SUBBLOCK : MAX_NUM_GT2_BINS_4x4SUBBLOCK ); + int remRegBins = ( is2x2subblock ? MAX_NUM_REG_BINS_2x2SUBBLOCK : MAX_NUM_REG_BINS_4x4SUBBLOCK ) - remGt2Bins; + int firstPosMode2 = minSubPos - 1; + int firstPosMode1 = minSubPos - 1; +#endif +#if JVET_L0274 + for( ; nextSigPos >= minSubPos && remRegBins >= 3; nextSigPos-- ) +#else for( ; nextSigPos >= minSubPos; nextSigPos-- ) +#endif { TCoeff Coeff = coeff[ cctx.blockPos( nextSigPos ) ]; unsigned sigFlag = ( Coeff != 0 ); @@ -2131,6 +2145,9 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe const unsigned sigCtxId = cctx.sigCtxIdAbs( nextSigPos, coeff, state ); m_BinEncoder.encodeBin( sigFlag, sigCtxId ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId ); +#if JVET_L0274 + remRegBins--; +#endif } if( sigFlag ) @@ -2147,6 +2164,26 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe if( nextSigPos != cctx.scanPosLast() ) signPattern <<= 1; if( Coeff < 0 ) signPattern++; +#if JVET_L0274 + unsigned gt1 = !!remAbsLevel; + m_BinEncoder.encodeBin( gt1, cctx.greater1CtxIdAbs(ctxOff) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt1_flag() bin=%d ctx=%d\n", gt1, cctx.greater1CtxIdAbs(ctxOff) ); + remRegBins--; + + if( gt1 ) + { + remAbsLevel -= 1; + m_BinEncoder.encodeBin( remAbsLevel&1, cctx.parityCtxIdAbs( ctxOff ) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "par_flag() bin=%d ctx=%d\n", remAbsLevel&1, cctx.parityCtxIdAbs( ctxOff ) ); + remAbsLevel >>= 1; + + remRegBins--; + if( remGt2Bins && !--remGt2Bins ) + { + firstPosMode1 = nextSigPos - 1; + } + } +#else m_BinEncoder.encodeBin( remAbsLevel&1, cctx.parityCtxIdAbs(ctxOff) ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "par_flag() bin=%d ctx=%d\n", remAbsLevel&1, cctx.parityCtxIdAbs(ctxOff) ); remAbsLevel >>= 1; @@ -2155,12 +2192,85 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe m_BinEncoder.encodeBin( gt1, cctx.greater1CtxIdAbs(ctxOff) ); DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt1_flag() bin=%d ctx=%d\n", gt1, cctx.greater1CtxIdAbs(ctxOff) ); nextPass |= gt1; +#endif } state = ( stateTransTable >> ((state<<2)+((Coeff&1)<<1)) ) & 3; } +#if JVET_L0274 + firstPosMode2 = nextSigPos; + firstPosMode1 = ( firstPosMode1 > firstPosMode2 ? firstPosMode1 : firstPosMode2 ); +#endif +#if JVET_L0274 + //===== 2nd PASS: gt2 ===== + for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) + { + unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] ); + if( absLevel >= 2 ) + { + uint8_t& ctxOff = ctxOffset[ scanPos - minSubPos ]; + unsigned gt2 = ( absLevel >= 4 ); + m_BinEncoder.encodeBin( gt2, cctx.greater2CtxIdAbs(ctxOff) ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "gt2_flag() bin=%d ctx=%d\n", gt2, cctx.greater2CtxIdAbs(ctxOff) ); + } + } + + //===== 3rd PASS: Go-rice codes ===== + unsigned ricePar = 0; + for( int scanPos = firstSigPos; scanPos > firstPosMode1; scanPos-- ) + { + unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] ); + if( absLevel >= 4 ) + { + unsigned rem = ( absLevel - 4 ) >> 1; + m_BinEncoder.encodeRemAbsEP( rem, ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar ); + if( ricePar < 3 && rem > (3<<ricePar)-1 ) + { + ricePar++; + } + } + } + for( int scanPos = firstPosMode1; scanPos > firstPosMode2; scanPos-- ) + { + unsigned absLevel = abs( coeff[ cctx.blockPos( scanPos ) ] ); + if( absLevel >= 2 ) + { + unsigned rem = ( absLevel - 2 ) >> 1; + m_BinEncoder.encodeRemAbsEP( rem, ricePar, cctx.extPrec(), cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar ); + if( ricePar < 3 && rem > (3<<ricePar)-1 ) + { + ricePar++; + } + } + } + + //===== coeff bypass ==== + for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- ) + { + TCoeff Coeff = coeff[ cctx.blockPos( scanPos ) ]; + unsigned absLevel = abs( Coeff ); + int sumAll = cctx.templateAbsSum(scanPos, coeff); + int rice = g_auiGoRiceParsCoeff [sumAll]; + int pos0 = g_auiGoRicePosCoeff0[std::max(0, state - 1)][sumAll]; + unsigned rem = ( absLevel == 0 ? pos0 : absLevel <= pos0 ? absLevel-1 : absLevel ); + m_BinEncoder.encodeRemAbsEP( rem, rice, cctx.extPrec(), cctx.maxLog2TrDRange() ); + DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice ); + state = ( stateTransTable >> ((state<<2)+((absLevel&1)<<1)) ) & 3; + if( absLevel ) + { + numNonZero++; +#if HEVC_USE_SIGN_HIDING + lastNZPos = std::max<int>( lastNZPos, scanPos ); +#endif + signPattern <<= 1; + if( Coeff < 0 ) signPattern++; + } + } +#else //===== 2nd PASS: gt2 ===== if( nextPass ) { @@ -2194,6 +2304,7 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe } } } +#endif //===== encode sign's ===== #if HEVC_USE_SIGN_HIDING