/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2019, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file RdCost.cpp \brief RD cost computation class */ #define DONT_UNDEF_SIZE_AWARE_PER_EL_OP #include "RdCost.h" #include "Rom.h" #include "UnitPartitioner.h" #include <limits> //! \ingroup CommonLib //! \{ FpDistFunc RdCost::m_afpDistortFunc[DF_TOTAL_FUNCTIONS] = { nullptr, }; RdCost::RdCost() { init(); } RdCost::~RdCost() { } #if WCG_EXT double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion, bool useUnadjustedLambda ) #else double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion ) #endif { #if WCG_EXT return ( useUnadjustedLambda ? m_DistScaleUnadjusted : m_DistScale ) * double( distortion ) + double( fracBits ); #else return m_DistScale * double( distortion ) + double( fracBits ); #endif } void RdCost::setLambda( double dLambda, const BitDepths &bitDepths ) { m_dLambda = dLambda; m_DistScale = double(1<<SCALE_BITS) / m_dLambda; m_dLambdaMotionSAD[0] = sqrt(m_dLambda); dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 + 6 * ((bitDepths.recon[CHANNEL_TYPE_LUMA] - 8) - DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[CHANNEL_TYPE_LUMA]))) / 3.0)); m_dLambdaMotionSAD[1] = sqrt(dLambda); } // Initialize Function Pointer by [eDFunc] void RdCost::init() { m_afpDistortFunc[DF_SSE ] = RdCost::xGetSSE; m_afpDistortFunc[DF_SSE2 ] = RdCost::xGetSSE; m_afpDistortFunc[DF_SSE4 ] = RdCost::xGetSSE4; m_afpDistortFunc[DF_SSE8 ] = RdCost::xGetSSE8; m_afpDistortFunc[DF_SSE16 ] = RdCost::xGetSSE16; m_afpDistortFunc[DF_SSE32 ] = RdCost::xGetSSE32; m_afpDistortFunc[DF_SSE64 ] = RdCost::xGetSSE64; m_afpDistortFunc[DF_SSE16N ] = RdCost::xGetSSE16N; m_afpDistortFunc[DF_SAD ] = RdCost::xGetSAD; m_afpDistortFunc[DF_SAD2 ] = RdCost::xGetSAD; m_afpDistortFunc[DF_SAD4 ] = RdCost::xGetSAD4; m_afpDistortFunc[DF_SAD8 ] = RdCost::xGetSAD8; m_afpDistortFunc[DF_SAD16 ] = RdCost::xGetSAD16; m_afpDistortFunc[DF_SAD32 ] = RdCost::xGetSAD32; m_afpDistortFunc[DF_SAD64 ] = RdCost::xGetSAD64; m_afpDistortFunc[DF_SAD16N ] = RdCost::xGetSAD16N; m_afpDistortFunc[DF_SAD12 ] = RdCost::xGetSAD12; m_afpDistortFunc[DF_SAD24 ] = RdCost::xGetSAD24; m_afpDistortFunc[DF_SAD48 ] = RdCost::xGetSAD48; m_afpDistortFunc[DF_HAD ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD2 ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD4 ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD8 ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD16 ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD32 ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD64 ] = RdCost::xGetHADs; m_afpDistortFunc[DF_HAD16N ] = RdCost::xGetHADs; m_afpDistortFunc[DF_MRSAD ] = RdCost::xGetMRSAD; m_afpDistortFunc[DF_MRSAD2 ] = RdCost::xGetMRSAD; m_afpDistortFunc[DF_MRSAD4 ] = RdCost::xGetMRSAD4; m_afpDistortFunc[DF_MRSAD8 ] = RdCost::xGetMRSAD8; m_afpDistortFunc[DF_MRSAD16 ] = RdCost::xGetMRSAD16; m_afpDistortFunc[DF_MRSAD32 ] = RdCost::xGetMRSAD32; m_afpDistortFunc[DF_MRSAD64 ] = RdCost::xGetMRSAD64; m_afpDistortFunc[DF_MRSAD16N ] = RdCost::xGetMRSAD16N; m_afpDistortFunc[DF_MRSAD12 ] = RdCost::xGetMRSAD12; m_afpDistortFunc[DF_MRSAD24 ] = RdCost::xGetMRSAD24; m_afpDistortFunc[DF_MRSAD48 ] = RdCost::xGetMRSAD48; m_afpDistortFunc[DF_MRHAD ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD2 ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD4 ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD8 ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD16 ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD32 ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD64 ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_MRHAD16N ] = RdCost::xGetMRHADs; m_afpDistortFunc[DF_SAD_FULL_NBIT ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT2 ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT4 ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT8 ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT16 ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT32 ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT64 ] = RdCost::xGetSAD_full; m_afpDistortFunc[DF_SAD_FULL_NBIT16N] = RdCost::xGetSAD_full; #if WCG_EXT m_afpDistortFunc[DF_SSE_WTD ] = RdCost::xGetSSE_WTD; m_afpDistortFunc[DF_SSE2_WTD ] = RdCost::xGetSSE2_WTD; m_afpDistortFunc[DF_SSE4_WTD ] = RdCost::xGetSSE4_WTD; m_afpDistortFunc[DF_SSE8_WTD ] = RdCost::xGetSSE8_WTD; m_afpDistortFunc[DF_SSE16_WTD ] = RdCost::xGetSSE16_WTD; m_afpDistortFunc[DF_SSE32_WTD ] = RdCost::xGetSSE32_WTD; m_afpDistortFunc[DF_SSE64_WTD ] = RdCost::xGetSSE64_WTD; m_afpDistortFunc[DF_SSE16N_WTD] = RdCost::xGetSSE16N_WTD; #endif m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD; #if ENABLE_SIMD_OPT_DIST #ifdef TARGET_SIMD_X86 initRdCostX86(); #endif #endif m_costMode = COST_STANDARD_LOSSY; m_motionLambda = 0; m_iCostScale = 0; } #if ENABLE_SPLIT_PARALLELISM void RdCost::copyState( const RdCost& other ) { m_costMode = other.m_costMode; m_dLambda = other.m_dLambda; m_DistScale = other.m_DistScale; memcpy( m_distortionWeight, other.m_distortionWeight, sizeof( m_distortionWeight ) ); m_mvPredictor = other.m_mvPredictor; m_motionLambda = other.m_motionLambda; m_iCostScale = other.m_iCostScale; memcpy( m_dLambdaMotionSAD, other.m_dLambdaMotionSAD, sizeof( m_dLambdaMotionSAD ) ); #if WCG_EXT m_dLambda_unadjusted = other.m_dLambda_unadjusted ; m_DistScaleUnadjusted = other.m_DistScaleUnadjusted; #endif } #endif void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY, int iRefStride, int bitDepth, ComponentID compID, int subShiftMode, int step, bool useHadamard ) { rcDP.bitDepth = bitDepth; rcDP.compID = compID; // set Original & Curr Pointer / Stride rcDP.org = org; rcDP.cur.buf = piRefY; rcDP.cur.stride = iRefStride; // set Block Width / Height rcDP.cur.width = org.width; rcDP.cur.height = org.height; rcDP.step = step; rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); int DFOffset = ( rcDP.useMR ? DF_MRSAD - DF_SAD : 0 ); if( !useHadamard ) { if( org.width == 12 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 + DFOffset ]; } else if( org.width == 24 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD24 + DFOffset ]; } else if( org.width == 48 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD48 + DFOffset ]; } else if( isPowerOf2( org.width ) ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + g_aucLog2[ org.width ] ]; } else { rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset ]; } } else if( isPowerOf2( org.width ) ) { rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + g_aucLog2[ org.width ] ]; } else { rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset ]; } // initialize rcDP.subShift = 0; if( subShiftMode == 1 ) { if( rcDP.org.height > 32 && ( rcDP.org.height & 15 ) == 0 ) { rcDP.subShift = 4; } else if( rcDP.org.height > 16 && ( rcDP.org.height & 7 ) == 0 ) { rcDP.subShift = 3; } else if( rcDP.org.height > 8 && ( rcDP.org.height & 3 ) == 0 ) { rcDP.subShift = 2; } else if( ( rcDP.org.height & 1 ) == 0 ) { rcDP.subShift = 1; } } else if( subShiftMode == 2 ) { if( rcDP.org.height > 8 && rcDP.org.width <= 64 ) { rcDP.subShift = 1; } } } void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard ) { rcDP.org = org; rcDP.cur = cur; rcDP.step = 1; rcDP.subShift = 0; rcDP.bitDepth = bitDepth; rcDP.compID = compID; const int DFOffset = ( rcDP.useMR ? DF_MRSAD - DF_SAD : 0 ); if( !useHadamard ) { if( org.width == 12 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 + DFOffset ]; } else if( org.width == 24 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD24 + DFOffset ]; } else if( org.width == 48 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD48 + DFOffset ]; } else if( isPowerOf2( org.width) ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + g_aucLog2[ org.width ] ]; } else { rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset ]; } } else { rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + g_aucLog2[ org.width ] ]; } rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); } void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard, bool bioApplied ) { rcDP.bitDepth = bitDepth; rcDP.compID = compID; rcDP.org.buf = pOrg; rcDP.org.stride = iOrgStride; rcDP.org.width = width; rcDP.org.height = height; rcDP.cur.buf = piRefY; rcDP.cur.stride = iRefStride; rcDP.cur.width = width; rcDP.cur.height = height; rcDP.subShift = subShiftMode; rcDP.step = step; rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max(); CHECK( useHadamard || rcDP.useMR, "only used in xDMVRCost with these default parameters (so far...)" ); if ( bioApplied ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ]; return; } if( width == 12 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 ]; } else if( width == 24 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD24 ]; } else if( width == 48 ) { rcDP.distFunc = m_afpDistortFunc[ DF_SAD48 ]; } else { rcDP.distFunc = m_afpDistortFunc[ DF_SAD + g_aucLog2[ width ] ]; } } #if WCG_EXT Distortion RdCost::getDistPart( const CPelBuf &org, const CPelBuf &cur, int bitDepth, const ComponentID compID, DFunc eDFunc, const CPelBuf *orgLuma ) #else Distortion RdCost::getDistPart( const CPelBuf &org, const CPelBuf &cur, int bitDepth, const ComponentID compID, DFunc eDFunc ) #endif { DistParam cDtParam; cDtParam.org = org; cDtParam.cur = cur; cDtParam.step = 1; cDtParam.bitDepth = bitDepth; cDtParam.compID = compID; #if WCG_EXT if( orgLuma ) { if( isChroma(compID) ) { cDtParam.orgLuma = *orgLuma; } else { cDtParam.orgLuma = org; } } #endif if( isPowerOf2( org.width ) ) { cDtParam.distFunc = m_afpDistortFunc[eDFunc + g_aucLog2[org.width]]; } else { cDtParam.distFunc = m_afpDistortFunc[eDFunc]; } if (isChroma(compID)) { return ((Distortion) (m_distortionWeight[ MAP_CHROMA(compID) ] * cDtParam.distFunc( cDtParam ))); } else { return cDtParam.distFunc( cDtParam ); } } // ==================================================================================================================== // Distortion functions // ==================================================================================================================== // -------------------------------------------------------------------------------------------------------------------- // SAD // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xGetSAD_full( const DistParam& rcDtParam ) { CHECK( rcDtParam.applyWeight, "Cannot apply weight when using full-bit SAD!" ); const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int height = rcDtParam.org.height; int width = rcDtParam.org.width; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; #define SAD_OP( ADDR ) uiSum += abs( piOrg[ADDR] - piCur[ADDR] ); #define SAD_INC piOrg += iStrideOrg; piCur += iStrideCur; SIZE_AWARE_PER_EL_OP( SAD_OP, SAD_INC ) #undef SAD_OP #undef SAD_INC uiSum <<= iSubShift; return uiSum; } Distortion RdCost::xGetSAD( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iCols = rcDtParam.org.width; int iRows = rcDtParam.org.height; const int iSubShift = rcDtParam.subShift; const int iSubStep = ( 1 << iSubShift ); const int iStrideCur = rcDtParam.cur.stride * iSubStep; const int iStrideOrg = rcDtParam.org.stride * iSubStep; const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); Distortion uiSum = 0; for( ; iRows != 0; iRows -= iSubStep ) { for (int n = 0; n < iCols; n++ ) { uiSum += abs( piOrg[n] - piCur[n] ); } if (rcDtParam.maximumDistortionForEarlyExit < ( uiSum >> distortionShift )) { return ( uiSum >> distortionShift ); } piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return ( uiSum >> distortionShift ); } Distortion RdCost::xGetSAD4( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows -= iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD8( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD16( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows -= iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); uiSum += abs( piOrg[8] - piCur[8] ); uiSum += abs( piOrg[9] - piCur[9] ); uiSum += abs( piOrg[10] - piCur[10] ); uiSum += abs( piOrg[11] - piCur[11] ); uiSum += abs( piOrg[12] - piCur[12] ); uiSum += abs( piOrg[13] - piCur[13] ); uiSum += abs( piOrg[14] - piCur[14] ); uiSum += abs( piOrg[15] - piCur[15] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD12( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); uiSum += abs( piOrg[8] - piCur[8] ); uiSum += abs( piOrg[9] - piCur[9] ); uiSum += abs( piOrg[10] - piCur[10] ); uiSum += abs( piOrg[11] - piCur[11] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD16N( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iCols = rcDtParam.org.width; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { for (int n = 0; n < iCols; n+=16 ) { uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] ); uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] ); uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] ); uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] ); uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] ); uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] ); uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] ); uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] ); uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] ); uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] ); uiSum += abs( piOrg[n+10] - piCur[n+10] ); uiSum += abs( piOrg[n+11] - piCur[n+11] ); uiSum += abs( piOrg[n+12] - piCur[n+12] ); uiSum += abs( piOrg[n+13] - piCur[n+13] ); uiSum += abs( piOrg[n+14] - piCur[n+14] ); uiSum += abs( piOrg[n+15] - piCur[n+15] ); } piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD32( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); uiSum += abs( piOrg[8] - piCur[8] ); uiSum += abs( piOrg[9] - piCur[9] ); uiSum += abs( piOrg[10] - piCur[10] ); uiSum += abs( piOrg[11] - piCur[11] ); uiSum += abs( piOrg[12] - piCur[12] ); uiSum += abs( piOrg[13] - piCur[13] ); uiSum += abs( piOrg[14] - piCur[14] ); uiSum += abs( piOrg[15] - piCur[15] ); uiSum += abs( piOrg[16] - piCur[16] ); uiSum += abs( piOrg[17] - piCur[17] ); uiSum += abs( piOrg[18] - piCur[18] ); uiSum += abs( piOrg[19] - piCur[19] ); uiSum += abs( piOrg[20] - piCur[20] ); uiSum += abs( piOrg[21] - piCur[21] ); uiSum += abs( piOrg[22] - piCur[22] ); uiSum += abs( piOrg[23] - piCur[23] ); uiSum += abs( piOrg[24] - piCur[24] ); uiSum += abs( piOrg[25] - piCur[25] ); uiSum += abs( piOrg[26] - piCur[26] ); uiSum += abs( piOrg[27] - piCur[27] ); uiSum += abs( piOrg[28] - piCur[28] ); uiSum += abs( piOrg[29] - piCur[29] ); uiSum += abs( piOrg[30] - piCur[30] ); uiSum += abs( piOrg[31] - piCur[31] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD24( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); uiSum += abs( piOrg[8] - piCur[8] ); uiSum += abs( piOrg[9] - piCur[9] ); uiSum += abs( piOrg[10] - piCur[10] ); uiSum += abs( piOrg[11] - piCur[11] ); uiSum += abs( piOrg[12] - piCur[12] ); uiSum += abs( piOrg[13] - piCur[13] ); uiSum += abs( piOrg[14] - piCur[14] ); uiSum += abs( piOrg[15] - piCur[15] ); uiSum += abs( piOrg[16] - piCur[16] ); uiSum += abs( piOrg[17] - piCur[17] ); uiSum += abs( piOrg[18] - piCur[18] ); uiSum += abs( piOrg[19] - piCur[19] ); uiSum += abs( piOrg[20] - piCur[20] ); uiSum += abs( piOrg[21] - piCur[21] ); uiSum += abs( piOrg[22] - piCur[22] ); uiSum += abs( piOrg[23] - piCur[23] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD64( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); uiSum += abs( piOrg[8] - piCur[8] ); uiSum += abs( piOrg[9] - piCur[9] ); uiSum += abs( piOrg[10] - piCur[10] ); uiSum += abs( piOrg[11] - piCur[11] ); uiSum += abs( piOrg[12] - piCur[12] ); uiSum += abs( piOrg[13] - piCur[13] ); uiSum += abs( piOrg[14] - piCur[14] ); uiSum += abs( piOrg[15] - piCur[15] ); uiSum += abs( piOrg[16] - piCur[16] ); uiSum += abs( piOrg[17] - piCur[17] ); uiSum += abs( piOrg[18] - piCur[18] ); uiSum += abs( piOrg[19] - piCur[19] ); uiSum += abs( piOrg[20] - piCur[20] ); uiSum += abs( piOrg[21] - piCur[21] ); uiSum += abs( piOrg[22] - piCur[22] ); uiSum += abs( piOrg[23] - piCur[23] ); uiSum += abs( piOrg[24] - piCur[24] ); uiSum += abs( piOrg[25] - piCur[25] ); uiSum += abs( piOrg[26] - piCur[26] ); uiSum += abs( piOrg[27] - piCur[27] ); uiSum += abs( piOrg[28] - piCur[28] ); uiSum += abs( piOrg[29] - piCur[29] ); uiSum += abs( piOrg[30] - piCur[30] ); uiSum += abs( piOrg[31] - piCur[31] ); uiSum += abs( piOrg[32] - piCur[32] ); uiSum += abs( piOrg[33] - piCur[33] ); uiSum += abs( piOrg[34] - piCur[34] ); uiSum += abs( piOrg[35] - piCur[35] ); uiSum += abs( piOrg[36] - piCur[36] ); uiSum += abs( piOrg[37] - piCur[37] ); uiSum += abs( piOrg[38] - piCur[38] ); uiSum += abs( piOrg[39] - piCur[39] ); uiSum += abs( piOrg[40] - piCur[40] ); uiSum += abs( piOrg[41] - piCur[41] ); uiSum += abs( piOrg[42] - piCur[42] ); uiSum += abs( piOrg[43] - piCur[43] ); uiSum += abs( piOrg[44] - piCur[44] ); uiSum += abs( piOrg[45] - piCur[45] ); uiSum += abs( piOrg[46] - piCur[46] ); uiSum += abs( piOrg[47] - piCur[47] ); uiSum += abs( piOrg[48] - piCur[48] ); uiSum += abs( piOrg[49] - piCur[49] ); uiSum += abs( piOrg[50] - piCur[50] ); uiSum += abs( piOrg[51] - piCur[51] ); uiSum += abs( piOrg[52] - piCur[52] ); uiSum += abs( piOrg[53] - piCur[53] ); uiSum += abs( piOrg[54] - piCur[54] ); uiSum += abs( piOrg[55] - piCur[55] ); uiSum += abs( piOrg[56] - piCur[56] ); uiSum += abs( piOrg[57] - piCur[57] ); uiSum += abs( piOrg[58] - piCur[58] ); uiSum += abs( piOrg[59] - piCur[59] ); uiSum += abs( piOrg[60] - piCur[60] ); uiSum += abs( piOrg[61] - piCur[61] ); uiSum += abs( piOrg[62] - piCur[62] ); uiSum += abs( piOrg[63] - piCur[63] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD48( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] ); uiSum += abs( piOrg[1] - piCur[1] ); uiSum += abs( piOrg[2] - piCur[2] ); uiSum += abs( piOrg[3] - piCur[3] ); uiSum += abs( piOrg[4] - piCur[4] ); uiSum += abs( piOrg[5] - piCur[5] ); uiSum += abs( piOrg[6] - piCur[6] ); uiSum += abs( piOrg[7] - piCur[7] ); uiSum += abs( piOrg[8] - piCur[8] ); uiSum += abs( piOrg[9] - piCur[9] ); uiSum += abs( piOrg[10] - piCur[10] ); uiSum += abs( piOrg[11] - piCur[11] ); uiSum += abs( piOrg[12] - piCur[12] ); uiSum += abs( piOrg[13] - piCur[13] ); uiSum += abs( piOrg[14] - piCur[14] ); uiSum += abs( piOrg[15] - piCur[15] ); uiSum += abs( piOrg[16] - piCur[16] ); uiSum += abs( piOrg[17] - piCur[17] ); uiSum += abs( piOrg[18] - piCur[18] ); uiSum += abs( piOrg[19] - piCur[19] ); uiSum += abs( piOrg[20] - piCur[20] ); uiSum += abs( piOrg[21] - piCur[21] ); uiSum += abs( piOrg[22] - piCur[22] ); uiSum += abs( piOrg[23] - piCur[23] ); uiSum += abs( piOrg[24] - piCur[24] ); uiSum += abs( piOrg[25] - piCur[25] ); uiSum += abs( piOrg[26] - piCur[26] ); uiSum += abs( piOrg[27] - piCur[27] ); uiSum += abs( piOrg[28] - piCur[28] ); uiSum += abs( piOrg[29] - piCur[29] ); uiSum += abs( piOrg[30] - piCur[30] ); uiSum += abs( piOrg[31] - piCur[31] ); uiSum += abs( piOrg[32] - piCur[32] ); uiSum += abs( piOrg[33] - piCur[33] ); uiSum += abs( piOrg[34] - piCur[34] ); uiSum += abs( piOrg[35] - piCur[35] ); uiSum += abs( piOrg[36] - piCur[36] ); uiSum += abs( piOrg[37] - piCur[37] ); uiSum += abs( piOrg[38] - piCur[38] ); uiSum += abs( piOrg[39] - piCur[39] ); uiSum += abs( piOrg[40] - piCur[40] ); uiSum += abs( piOrg[41] - piCur[41] ); uiSum += abs( piOrg[42] - piCur[42] ); uiSum += abs( piOrg[43] - piCur[43] ); uiSum += abs( piOrg[44] - piCur[44] ); uiSum += abs( piOrg[45] - piCur[45] ); uiSum += abs( piOrg[46] - piCur[46] ); uiSum += abs( piOrg[47] - piCur[47] ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } // -------------------------------------------------------------------------------------------------------------------- // MRSAD // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xGetMRSAD( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iCols = rcDtParam.org.width; int iRows = rcDtParam.org.height; const int iSubShift = rcDtParam.subShift; const int iSubStep = ( 1 << iSubShift ); const int iStrideCur = rcDtParam.cur.stride * iSubStep; const int iStrideOrg = rcDtParam.org.stride * iSubStep; const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { for( int n = 0; n < iCols; n++ ) { deltaSum += ( piOrg[n] - piCur[n] ); } } const Pel offset = Pel( deltaSum / ( iCols * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows -= iSubStep ) { for (int n = 0; n < iCols; n++ ) { uiSum += abs( piOrg[n] - piCur[n] - offset ); } if (rcDtParam.maximumDistortionForEarlyExit < ( uiSum >> distortionShift )) { return ( uiSum >> distortionShift ); } piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return ( uiSum >> distortionShift ); } Distortion RdCost::xGetMRSAD4( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[0] - piCur[0] ); deltaSum += ( piOrg[1] - piCur[1] ); deltaSum += ( piOrg[2] - piCur[2] ); deltaSum += ( piOrg[3] - piCur[3] ); } const Pel offset = Pel( deltaSum / ( 4 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows -= iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] - offset ); uiSum += abs( piOrg[1] - piCur[1] - offset ); uiSum += abs( piOrg[2] - piCur[2] - offset ); uiSum += abs( piOrg[3] - piCur[3] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD8( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[0] - piCur[0] ); deltaSum += ( piOrg[1] - piCur[1] ); deltaSum += ( piOrg[2] - piCur[2] ); deltaSum += ( piOrg[3] - piCur[3] ); deltaSum += ( piOrg[4] - piCur[4] ); deltaSum += ( piOrg[5] - piCur[5] ); deltaSum += ( piOrg[6] - piCur[6] ); deltaSum += ( piOrg[7] - piCur[7] ); } const Pel offset = Pel( deltaSum / ( 8 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[0] - piCur[0] - offset ); uiSum += abs( piOrg[1] - piCur[1] - offset ); uiSum += abs( piOrg[2] - piCur[2] - offset ); uiSum += abs( piOrg[3] - piCur[3] - offset ); uiSum += abs( piOrg[4] - piCur[4] - offset ); uiSum += abs( piOrg[5] - piCur[5] - offset ); uiSum += abs( piOrg[6] - piCur[6] - offset ); uiSum += abs( piOrg[7] - piCur[7] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD16( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); } const Pel offset = Pel( deltaSum / ( 16 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows -= iSubStep ) { uiSum += abs( piOrg[ 0] - piCur[ 0] - offset ); uiSum += abs( piOrg[ 1] - piCur[ 1] - offset ); uiSum += abs( piOrg[ 2] - piCur[ 2] - offset ); uiSum += abs( piOrg[ 3] - piCur[ 3] - offset ); uiSum += abs( piOrg[ 4] - piCur[ 4] - offset ); uiSum += abs( piOrg[ 5] - piCur[ 5] - offset ); uiSum += abs( piOrg[ 6] - piCur[ 6] - offset ); uiSum += abs( piOrg[ 7] - piCur[ 7] - offset ); uiSum += abs( piOrg[ 8] - piCur[ 8] - offset ); uiSum += abs( piOrg[ 9] - piCur[ 9] - offset ); uiSum += abs( piOrg[10] - piCur[10] - offset ); uiSum += abs( piOrg[11] - piCur[11] - offset ); uiSum += abs( piOrg[12] - piCur[12] - offset ); uiSum += abs( piOrg[13] - piCur[13] - offset ); uiSum += abs( piOrg[14] - piCur[14] - offset ); uiSum += abs( piOrg[15] - piCur[15] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD12( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); } const Pel offset = Pel( deltaSum / ( 12 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[ 0] - piCur[ 0] - offset ); uiSum += abs( piOrg[ 1] - piCur[ 1] - offset ); uiSum += abs( piOrg[ 2] - piCur[ 2] - offset ); uiSum += abs( piOrg[ 3] - piCur[ 3] - offset ); uiSum += abs( piOrg[ 4] - piCur[ 4] - offset ); uiSum += abs( piOrg[ 5] - piCur[ 5] - offset ); uiSum += abs( piOrg[ 6] - piCur[ 6] - offset ); uiSum += abs( piOrg[ 7] - piCur[ 7] - offset ); uiSum += abs( piOrg[ 8] - piCur[ 8] - offset ); uiSum += abs( piOrg[ 9] - piCur[ 9] - offset ); uiSum += abs( piOrg[10] - piCur[10] - offset ); uiSum += abs( piOrg[11] - piCur[11] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD16N( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iCols = rcDtParam.org.width; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { for( int n = 0; n < iCols; n += 16 ) { deltaSum += ( piOrg[n+ 0] - piCur[n+ 0] ); deltaSum += ( piOrg[n+ 1] - piCur[n+ 1] ); deltaSum += ( piOrg[n+ 2] - piCur[n+ 2] ); deltaSum += ( piOrg[n+ 3] - piCur[n+ 3] ); deltaSum += ( piOrg[n+ 4] - piCur[n+ 4] ); deltaSum += ( piOrg[n+ 5] - piCur[n+ 5] ); deltaSum += ( piOrg[n+ 6] - piCur[n+ 6] ); deltaSum += ( piOrg[n+ 7] - piCur[n+ 7] ); deltaSum += ( piOrg[n+ 8] - piCur[n+ 8] ); deltaSum += ( piOrg[n+ 9] - piCur[n+ 9] ); deltaSum += ( piOrg[n+10] - piCur[n+10] ); deltaSum += ( piOrg[n+11] - piCur[n+11] ); deltaSum += ( piOrg[n+12] - piCur[n+12] ); deltaSum += ( piOrg[n+13] - piCur[n+13] ); deltaSum += ( piOrg[n+14] - piCur[n+14] ); deltaSum += ( piOrg[n+15] - piCur[n+15] ); } } const Pel offset = Pel( deltaSum / ( iCols * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { for (int n = 0; n < iCols; n+=16 ) { uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] - offset ); uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] - offset ); uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] - offset ); uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] - offset ); uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] - offset ); uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] - offset ); uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] - offset ); uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] - offset ); uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] - offset ); uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] - offset ); uiSum += abs( piOrg[n+10] - piCur[n+10] - offset ); uiSum += abs( piOrg[n+11] - piCur[n+11] - offset ); uiSum += abs( piOrg[n+12] - piCur[n+12] - offset ); uiSum += abs( piOrg[n+13] - piCur[n+13] - offset ); uiSum += abs( piOrg[n+14] - piCur[n+14] - offset ); uiSum += abs( piOrg[n+15] - piCur[n+15] - offset ); } piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD32( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); deltaSum += ( piOrg[24] - piCur[24] ); deltaSum += ( piOrg[25] - piCur[25] ); deltaSum += ( piOrg[26] - piCur[26] ); deltaSum += ( piOrg[27] - piCur[27] ); deltaSum += ( piOrg[28] - piCur[28] ); deltaSum += ( piOrg[29] - piCur[29] ); deltaSum += ( piOrg[30] - piCur[30] ); deltaSum += ( piOrg[31] - piCur[31] ); } const Pel offset = Pel( deltaSum / ( 32 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[ 0] - piCur[ 0] - offset ); uiSum += abs( piOrg[ 1] - piCur[ 1] - offset ); uiSum += abs( piOrg[ 2] - piCur[ 2] - offset ); uiSum += abs( piOrg[ 3] - piCur[ 3] - offset ); uiSum += abs( piOrg[ 4] - piCur[ 4] - offset ); uiSum += abs( piOrg[ 5] - piCur[ 5] - offset ); uiSum += abs( piOrg[ 6] - piCur[ 6] - offset ); uiSum += abs( piOrg[ 7] - piCur[ 7] - offset ); uiSum += abs( piOrg[ 8] - piCur[ 8] - offset ); uiSum += abs( piOrg[ 9] - piCur[ 9] - offset ); uiSum += abs( piOrg[10] - piCur[10] - offset ); uiSum += abs( piOrg[11] - piCur[11] - offset ); uiSum += abs( piOrg[12] - piCur[12] - offset ); uiSum += abs( piOrg[13] - piCur[13] - offset ); uiSum += abs( piOrg[14] - piCur[14] - offset ); uiSum += abs( piOrg[15] - piCur[15] - offset ); uiSum += abs( piOrg[16] - piCur[16] - offset ); uiSum += abs( piOrg[17] - piCur[17] - offset ); uiSum += abs( piOrg[18] - piCur[18] - offset ); uiSum += abs( piOrg[19] - piCur[19] - offset ); uiSum += abs( piOrg[20] - piCur[20] - offset ); uiSum += abs( piOrg[21] - piCur[21] - offset ); uiSum += abs( piOrg[22] - piCur[22] - offset ); uiSum += abs( piOrg[23] - piCur[23] - offset ); uiSum += abs( piOrg[24] - piCur[24] - offset ); uiSum += abs( piOrg[25] - piCur[25] - offset ); uiSum += abs( piOrg[26] - piCur[26] - offset ); uiSum += abs( piOrg[27] - piCur[27] - offset ); uiSum += abs( piOrg[28] - piCur[28] - offset ); uiSum += abs( piOrg[29] - piCur[29] - offset ); uiSum += abs( piOrg[30] - piCur[30] - offset ); uiSum += abs( piOrg[31] - piCur[31] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD24( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); } const Pel offset = Pel( deltaSum / ( 24 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[ 0] - piCur[ 0] - offset ); uiSum += abs( piOrg[ 1] - piCur[ 1] - offset ); uiSum += abs( piOrg[ 2] - piCur[ 2] - offset ); uiSum += abs( piOrg[ 3] - piCur[ 3] - offset ); uiSum += abs( piOrg[ 4] - piCur[ 4] - offset ); uiSum += abs( piOrg[ 5] - piCur[ 5] - offset ); uiSum += abs( piOrg[ 6] - piCur[ 6] - offset ); uiSum += abs( piOrg[ 7] - piCur[ 7] - offset ); uiSum += abs( piOrg[ 8] - piCur[ 8] - offset ); uiSum += abs( piOrg[ 9] - piCur[ 9] - offset ); uiSum += abs( piOrg[10] - piCur[10] - offset ); uiSum += abs( piOrg[11] - piCur[11] - offset ); uiSum += abs( piOrg[12] - piCur[12] - offset ); uiSum += abs( piOrg[13] - piCur[13] - offset ); uiSum += abs( piOrg[14] - piCur[14] - offset ); uiSum += abs( piOrg[15] - piCur[15] - offset ); uiSum += abs( piOrg[16] - piCur[16] - offset ); uiSum += abs( piOrg[17] - piCur[17] - offset ); uiSum += abs( piOrg[18] - piCur[18] - offset ); uiSum += abs( piOrg[19] - piCur[19] - offset ); uiSum += abs( piOrg[20] - piCur[20] - offset ); uiSum += abs( piOrg[21] - piCur[21] - offset ); uiSum += abs( piOrg[22] - piCur[22] - offset ); uiSum += abs( piOrg[23] - piCur[23] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD64( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); deltaSum += ( piOrg[24] - piCur[24] ); deltaSum += ( piOrg[25] - piCur[25] ); deltaSum += ( piOrg[26] - piCur[26] ); deltaSum += ( piOrg[27] - piCur[27] ); deltaSum += ( piOrg[28] - piCur[28] ); deltaSum += ( piOrg[29] - piCur[29] ); deltaSum += ( piOrg[30] - piCur[30] ); deltaSum += ( piOrg[31] - piCur[31] ); deltaSum += ( piOrg[32] - piCur[32] ); deltaSum += ( piOrg[33] - piCur[33] ); deltaSum += ( piOrg[34] - piCur[34] ); deltaSum += ( piOrg[35] - piCur[35] ); deltaSum += ( piOrg[36] - piCur[36] ); deltaSum += ( piOrg[37] - piCur[37] ); deltaSum += ( piOrg[38] - piCur[38] ); deltaSum += ( piOrg[39] - piCur[39] ); deltaSum += ( piOrg[40] - piCur[40] ); deltaSum += ( piOrg[41] - piCur[41] ); deltaSum += ( piOrg[42] - piCur[42] ); deltaSum += ( piOrg[43] - piCur[43] ); deltaSum += ( piOrg[44] - piCur[44] ); deltaSum += ( piOrg[45] - piCur[45] ); deltaSum += ( piOrg[46] - piCur[46] ); deltaSum += ( piOrg[47] - piCur[47] ); deltaSum += ( piOrg[48] - piCur[48] ); deltaSum += ( piOrg[49] - piCur[49] ); deltaSum += ( piOrg[50] - piCur[50] ); deltaSum += ( piOrg[51] - piCur[51] ); deltaSum += ( piOrg[52] - piCur[52] ); deltaSum += ( piOrg[53] - piCur[53] ); deltaSum += ( piOrg[54] - piCur[54] ); deltaSum += ( piOrg[55] - piCur[55] ); deltaSum += ( piOrg[56] - piCur[56] ); deltaSum += ( piOrg[57] - piCur[57] ); deltaSum += ( piOrg[58] - piCur[58] ); deltaSum += ( piOrg[59] - piCur[59] ); deltaSum += ( piOrg[60] - piCur[60] ); deltaSum += ( piOrg[61] - piCur[61] ); deltaSum += ( piOrg[62] - piCur[62] ); deltaSum += ( piOrg[63] - piCur[63] ); } const Pel offset = Pel( deltaSum / ( 64 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[ 0] - piCur[ 0] - offset ); uiSum += abs( piOrg[ 1] - piCur[ 1] - offset ); uiSum += abs( piOrg[ 2] - piCur[ 2] - offset ); uiSum += abs( piOrg[ 3] - piCur[ 3] - offset ); uiSum += abs( piOrg[ 4] - piCur[ 4] - offset ); uiSum += abs( piOrg[ 5] - piCur[ 5] - offset ); uiSum += abs( piOrg[ 6] - piCur[ 6] - offset ); uiSum += abs( piOrg[ 7] - piCur[ 7] - offset ); uiSum += abs( piOrg[ 8] - piCur[ 8] - offset ); uiSum += abs( piOrg[ 9] - piCur[ 9] - offset ); uiSum += abs( piOrg[10] - piCur[10] - offset ); uiSum += abs( piOrg[11] - piCur[11] - offset ); uiSum += abs( piOrg[12] - piCur[12] - offset ); uiSum += abs( piOrg[13] - piCur[13] - offset ); uiSum += abs( piOrg[14] - piCur[14] - offset ); uiSum += abs( piOrg[15] - piCur[15] - offset ); uiSum += abs( piOrg[16] - piCur[16] - offset ); uiSum += abs( piOrg[17] - piCur[17] - offset ); uiSum += abs( piOrg[18] - piCur[18] - offset ); uiSum += abs( piOrg[19] - piCur[19] - offset ); uiSum += abs( piOrg[20] - piCur[20] - offset ); uiSum += abs( piOrg[21] - piCur[21] - offset ); uiSum += abs( piOrg[22] - piCur[22] - offset ); uiSum += abs( piOrg[23] - piCur[23] - offset ); uiSum += abs( piOrg[24] - piCur[24] - offset ); uiSum += abs( piOrg[25] - piCur[25] - offset ); uiSum += abs( piOrg[26] - piCur[26] - offset ); uiSum += abs( piOrg[27] - piCur[27] - offset ); uiSum += abs( piOrg[28] - piCur[28] - offset ); uiSum += abs( piOrg[29] - piCur[29] - offset ); uiSum += abs( piOrg[30] - piCur[30] - offset ); uiSum += abs( piOrg[31] - piCur[31] - offset ); uiSum += abs( piOrg[32] - piCur[32] - offset ); uiSum += abs( piOrg[33] - piCur[33] - offset ); uiSum += abs( piOrg[34] - piCur[34] - offset ); uiSum += abs( piOrg[35] - piCur[35] - offset ); uiSum += abs( piOrg[36] - piCur[36] - offset ); uiSum += abs( piOrg[37] - piCur[37] - offset ); uiSum += abs( piOrg[38] - piCur[38] - offset ); uiSum += abs( piOrg[39] - piCur[39] - offset ); uiSum += abs( piOrg[40] - piCur[40] - offset ); uiSum += abs( piOrg[41] - piCur[41] - offset ); uiSum += abs( piOrg[42] - piCur[42] - offset ); uiSum += abs( piOrg[43] - piCur[43] - offset ); uiSum += abs( piOrg[44] - piCur[44] - offset ); uiSum += abs( piOrg[45] - piCur[45] - offset ); uiSum += abs( piOrg[46] - piCur[46] - offset ); uiSum += abs( piOrg[47] - piCur[47] - offset ); uiSum += abs( piOrg[48] - piCur[48] - offset ); uiSum += abs( piOrg[49] - piCur[49] - offset ); uiSum += abs( piOrg[50] - piCur[50] - offset ); uiSum += abs( piOrg[51] - piCur[51] - offset ); uiSum += abs( piOrg[52] - piCur[52] - offset ); uiSum += abs( piOrg[53] - piCur[53] - offset ); uiSum += abs( piOrg[54] - piCur[54] - offset ); uiSum += abs( piOrg[55] - piCur[55] - offset ); uiSum += abs( piOrg[56] - piCur[56] - offset ); uiSum += abs( piOrg[57] - piCur[57] - offset ); uiSum += abs( piOrg[58] - piCur[58] - offset ); uiSum += abs( piOrg[59] - piCur[59] - offset ); uiSum += abs( piOrg[60] - piCur[60] - offset ); uiSum += abs( piOrg[61] - piCur[61] - offset ); uiSum += abs( piOrg[62] - piCur[62] - offset ); uiSum += abs( piOrg[63] - piCur[63] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD48( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iSubShift = rcDtParam.subShift; int iSubStep = ( 1 << iSubShift ); int iStrideCur = rcDtParam.cur.stride * iSubStep; int iStrideOrg = rcDtParam.org.stride * iSubStep; int32_t deltaSum = 0; for( int r = iRows; r != 0; r-=iSubStep, piOrg += iStrideOrg, piCur += iStrideCur ) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); deltaSum += ( piOrg[24] - piCur[24] ); deltaSum += ( piOrg[25] - piCur[25] ); deltaSum += ( piOrg[26] - piCur[26] ); deltaSum += ( piOrg[27] - piCur[27] ); deltaSum += ( piOrg[28] - piCur[28] ); deltaSum += ( piOrg[29] - piCur[29] ); deltaSum += ( piOrg[30] - piCur[30] ); deltaSum += ( piOrg[31] - piCur[31] ); deltaSum += ( piOrg[32] - piCur[32] ); deltaSum += ( piOrg[33] - piCur[33] ); deltaSum += ( piOrg[34] - piCur[34] ); deltaSum += ( piOrg[35] - piCur[35] ); deltaSum += ( piOrg[36] - piCur[36] ); deltaSum += ( piOrg[37] - piCur[37] ); deltaSum += ( piOrg[38] - piCur[38] ); deltaSum += ( piOrg[39] - piCur[39] ); deltaSum += ( piOrg[40] - piCur[40] ); deltaSum += ( piOrg[41] - piCur[41] ); deltaSum += ( piOrg[42] - piCur[42] ); deltaSum += ( piOrg[43] - piCur[43] ); deltaSum += ( piOrg[44] - piCur[44] ); deltaSum += ( piOrg[45] - piCur[45] ); deltaSum += ( piOrg[46] - piCur[46] ); deltaSum += ( piOrg[47] - piCur[47] ); } const Pel offset = Pel( deltaSum / ( 48 * ( iRows >> iSubShift ) ) ); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion uiSum = 0; for( ; iRows != 0; iRows-=iSubStep ) { uiSum += abs( piOrg[ 0] - piCur[ 0] - offset ); uiSum += abs( piOrg[ 1] - piCur[ 1] - offset ); uiSum += abs( piOrg[ 2] - piCur[ 2] - offset ); uiSum += abs( piOrg[ 3] - piCur[ 3] - offset ); uiSum += abs( piOrg[ 4] - piCur[ 4] - offset ); uiSum += abs( piOrg[ 5] - piCur[ 5] - offset ); uiSum += abs( piOrg[ 6] - piCur[ 6] - offset ); uiSum += abs( piOrg[ 7] - piCur[ 7] - offset ); uiSum += abs( piOrg[ 8] - piCur[ 8] - offset ); uiSum += abs( piOrg[ 9] - piCur[ 9] - offset ); uiSum += abs( piOrg[10] - piCur[10] - offset ); uiSum += abs( piOrg[11] - piCur[11] - offset ); uiSum += abs( piOrg[12] - piCur[12] - offset ); uiSum += abs( piOrg[13] - piCur[13] - offset ); uiSum += abs( piOrg[14] - piCur[14] - offset ); uiSum += abs( piOrg[15] - piCur[15] - offset ); uiSum += abs( piOrg[16] - piCur[16] - offset ); uiSum += abs( piOrg[17] - piCur[17] - offset ); uiSum += abs( piOrg[18] - piCur[18] - offset ); uiSum += abs( piOrg[19] - piCur[19] - offset ); uiSum += abs( piOrg[20] - piCur[20] - offset ); uiSum += abs( piOrg[21] - piCur[21] - offset ); uiSum += abs( piOrg[22] - piCur[22] - offset ); uiSum += abs( piOrg[23] - piCur[23] - offset ); uiSum += abs( piOrg[24] - piCur[24] - offset ); uiSum += abs( piOrg[25] - piCur[25] - offset ); uiSum += abs( piOrg[26] - piCur[26] - offset ); uiSum += abs( piOrg[27] - piCur[27] - offset ); uiSum += abs( piOrg[28] - piCur[28] - offset ); uiSum += abs( piOrg[29] - piCur[29] - offset ); uiSum += abs( piOrg[30] - piCur[30] - offset ); uiSum += abs( piOrg[31] - piCur[31] - offset ); uiSum += abs( piOrg[32] - piCur[32] - offset ); uiSum += abs( piOrg[33] - piCur[33] - offset ); uiSum += abs( piOrg[34] - piCur[34] - offset ); uiSum += abs( piOrg[35] - piCur[35] - offset ); uiSum += abs( piOrg[36] - piCur[36] - offset ); uiSum += abs( piOrg[37] - piCur[37] - offset ); uiSum += abs( piOrg[38] - piCur[38] - offset ); uiSum += abs( piOrg[39] - piCur[39] - offset ); uiSum += abs( piOrg[40] - piCur[40] - offset ); uiSum += abs( piOrg[41] - piCur[41] - offset ); uiSum += abs( piOrg[42] - piCur[42] - offset ); uiSum += abs( piOrg[43] - piCur[43] - offset ); uiSum += abs( piOrg[44] - piCur[44] - offset ); uiSum += abs( piOrg[45] - piCur[45] - offset ); uiSum += abs( piOrg[46] - piCur[46] - offset ); uiSum += abs( piOrg[47] - piCur[47] - offset ); piOrg += iStrideOrg; piCur += iStrideCur; } uiSum <<= iSubShift; return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } // -------------------------------------------------------------------------------------------------------------------- // SSE // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xGetSSE( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iCols = rcDtParam.org.width; int iStrideCur = rcDtParam.cur.stride; int iStrideOrg = rcDtParam.org.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { for (int n = 0; n < iCols; n++ ) { iTemp = piOrg[n ] - piCur[n ]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); } piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } Distortion RdCost::xGetSSE4( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 4, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iStrideOrg = rcDtParam.org.stride; int iStrideCur = rcDtParam.cur.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } Distortion RdCost::xGetSSE8( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 8, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iStrideOrg = rcDtParam.org.stride; int iStrideCur = rcDtParam.cur.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } Distortion RdCost::xGetSSE16( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 16, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iStrideOrg = rcDtParam.org.stride; int iStrideCur = rcDtParam.cur.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } Distortion RdCost::xGetSSE16N( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iCols = rcDtParam.org.width; int iStrideOrg = rcDtParam.org.stride; int iStrideCur = rcDtParam.cur.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { for (int n = 0; n < iCols; n+=16 ) { iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); } piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } Distortion RdCost::xGetSSE32( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 32, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iStrideOrg = rcDtParam.org.stride; int iStrideCur = rcDtParam.cur.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } Distortion RdCost::xGetSSE64( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 64, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int iRows = rcDtParam.org.height; int iStrideOrg = rcDtParam.org.stride; int iStrideCur = rcDtParam.cur.stride; Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int iTemp; for( ; iRows != 0; iRows-- ) { iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; } return ( uiSum ); } // -------------------------------------------------------------------------------------------------------------------- // HADAMARD with step (used in fractional search) // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xCalcHADs2x2( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iStep ) { Distortion satd = 0; TCoeff diff[4], m[4]; CHECK( iStep != 1, "Invalid step" ); diff[0] = piOrg[0 ] - piCur[0]; diff[1] = piOrg[1 ] - piCur[1]; diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur]; diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur]; m[0] = diff[0] + diff[2]; m[1] = diff[1] + diff[3]; m[2] = diff[0] - diff[2]; m[3] = diff[1] - diff[3]; satd += abs(m[0] + m[1]); satd += abs(m[0] - m[1]); satd += abs(m[2] + m[3]); satd += abs(m[2] - m[3]); return satd; } Distortion RdCost::xCalcHADs4x4( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iStep ) { int k; Distortion satd = 0; TCoeff diff[16], m[16], d[16]; CHECK( iStep != 1, "Invalid step" ); for( k = 0; k < 16; k+=4 ) { diff[k+0] = piOrg[0] - piCur[0]; diff[k+1] = piOrg[1] - piCur[1]; diff[k+2] = piOrg[2] - piCur[2]; diff[k+3] = piOrg[3] - piCur[3]; piCur += iStrideCur; piOrg += iStrideOrg; } /*===== hadamard transform =====*/ m[ 0] = diff[ 0] + diff[12]; m[ 1] = diff[ 1] + diff[13]; m[ 2] = diff[ 2] + diff[14]; m[ 3] = diff[ 3] + diff[15]; m[ 4] = diff[ 4] + diff[ 8]; m[ 5] = diff[ 5] + diff[ 9]; m[ 6] = diff[ 6] + diff[10]; m[ 7] = diff[ 7] + diff[11]; m[ 8] = diff[ 4] - diff[ 8]; m[ 9] = diff[ 5] - diff[ 9]; m[10] = diff[ 6] - diff[10]; m[11] = diff[ 7] - diff[11]; m[12] = diff[ 0] - diff[12]; m[13] = diff[ 1] - diff[13]; m[14] = diff[ 2] - diff[14]; m[15] = diff[ 3] - diff[15]; d[ 0] = m[ 0] + m[ 4]; d[ 1] = m[ 1] + m[ 5]; d[ 2] = m[ 2] + m[ 6]; d[ 3] = m[ 3] + m[ 7]; d[ 4] = m[ 8] + m[12]; d[ 5] = m[ 9] + m[13]; d[ 6] = m[10] + m[14]; d[ 7] = m[11] + m[15]; d[ 8] = m[ 0] - m[ 4]; d[ 9] = m[ 1] - m[ 5]; d[10] = m[ 2] - m[ 6]; d[11] = m[ 3] - m[ 7]; d[12] = m[12] - m[ 8]; d[13] = m[13] - m[ 9]; d[14] = m[14] - m[10]; d[15] = m[15] - m[11]; m[ 0] = d[ 0] + d[ 3]; m[ 1] = d[ 1] + d[ 2]; m[ 2] = d[ 1] - d[ 2]; m[ 3] = d[ 0] - d[ 3]; m[ 4] = d[ 4] + d[ 7]; m[ 5] = d[ 5] + d[ 6]; m[ 6] = d[ 5] - d[ 6]; m[ 7] = d[ 4] - d[ 7]; m[ 8] = d[ 8] + d[11]; m[ 9] = d[ 9] + d[10]; m[10] = d[ 9] - d[10]; m[11] = d[ 8] - d[11]; m[12] = d[12] + d[15]; m[13] = d[13] + d[14]; m[14] = d[13] - d[14]; m[15] = d[12] - d[15]; d[ 0] = m[ 0] + m[ 1]; d[ 1] = m[ 0] - m[ 1]; d[ 2] = m[ 2] + m[ 3]; d[ 3] = m[ 3] - m[ 2]; d[ 4] = m[ 4] + m[ 5]; d[ 5] = m[ 4] - m[ 5]; d[ 6] = m[ 6] + m[ 7]; d[ 7] = m[ 7] - m[ 6]; d[ 8] = m[ 8] + m[ 9]; d[ 9] = m[ 8] - m[ 9]; d[10] = m[10] + m[11]; d[11] = m[11] - m[10]; d[12] = m[12] + m[13]; d[13] = m[12] - m[13]; d[14] = m[14] + m[15]; d[15] = m[15] - m[14]; for (k=0; k<16; ++k) { satd += abs(d[k]); } satd = ((satd+1)>>1); return satd; } Distortion RdCost::xCalcHADs8x8( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iStep ) { int k, i, j, jj; Distortion sad = 0; TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8]; CHECK( iStep != 1, "Invalid step" ); for( k = 0; k < 64; k += 8 ) { diff[k+0] = piOrg[0] - piCur[0]; diff[k+1] = piOrg[1] - piCur[1]; diff[k+2] = piOrg[2] - piCur[2]; diff[k+3] = piOrg[3] - piCur[3]; diff[k+4] = piOrg[4] - piCur[4]; diff[k+5] = piOrg[5] - piCur[5]; diff[k+6] = piOrg[6] - piCur[6]; diff[k+7] = piOrg[7] - piCur[7]; piCur += iStrideCur; piOrg += iStrideOrg; } //horizontal for (j=0; j < 8; j++) { jj = j << 3; m2[j][0] = diff[jj ] + diff[jj+4]; m2[j][1] = diff[jj+1] + diff[jj+5]; m2[j][2] = diff[jj+2] + diff[jj+6]; m2[j][3] = diff[jj+3] + diff[jj+7]; m2[j][4] = diff[jj ] - diff[jj+4]; m2[j][5] = diff[jj+1] - diff[jj+5]; m2[j][6] = diff[jj+2] - diff[jj+6]; m2[j][7] = diff[jj+3] - diff[jj+7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for (i=0; i < 8; i++) { m3[0][i] = m2[0][i] + m2[4][i]; m3[1][i] = m2[1][i] + m2[5][i]; m3[2][i] = m2[2][i] + m2[6][i]; m3[3][i] = m2[3][i] + m2[7][i]; m3[4][i] = m2[0][i] - m2[4][i]; m3[5][i] = m2[1][i] - m2[5][i]; m3[6][i] = m2[2][i] - m2[6][i]; m3[7][i] = m2[3][i] - m2[7][i]; m1[0][i] = m3[0][i] + m3[2][i]; m1[1][i] = m3[1][i] + m3[3][i]; m1[2][i] = m3[0][i] - m3[2][i]; m1[3][i] = m3[1][i] - m3[3][i]; m1[4][i] = m3[4][i] + m3[6][i]; m1[5][i] = m3[5][i] + m3[7][i]; m1[6][i] = m3[4][i] - m3[6][i]; m1[7][i] = m3[5][i] - m3[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { sad += abs(m2[i][j]); } } sad=((sad+2)>>2); return sad; } Distortion RdCost::xCalcHADs16x8( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur ) { //need to add SIMD implementation ,JCA int k, i, j, jj, sad = 0; int diff[128], m1[8][16], m2[8][16]; for( k = 0; k < 128; k += 16 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; diff[k + 4] = piOrg[4] - piCur[4]; diff[k + 5] = piOrg[5] - piCur[5]; diff[k + 6] = piOrg[6] - piCur[6]; diff[k + 7] = piOrg[7] - piCur[7]; diff[k + 8] = piOrg[8] - piCur[8]; diff[k + 9] = piOrg[9] - piCur[9]; diff[k + 10] = piOrg[10] - piCur[10]; diff[k + 11] = piOrg[11] - piCur[11]; diff[k + 12] = piOrg[12] - piCur[12]; diff[k + 13] = piOrg[13] - piCur[13]; diff[k + 14] = piOrg[14] - piCur[14]; diff[k + 15] = piOrg[15] - piCur[15]; piCur += iStrideCur; piOrg += iStrideOrg; } //horizontal for( j = 0; j < 8; j++ ) { jj = j << 4; m2[j][0] = diff[jj ] + diff[jj + 8]; m2[j][1] = diff[jj + 1] + diff[jj + 9]; m2[j][2] = diff[jj + 2] + diff[jj + 10]; m2[j][3] = diff[jj + 3] + diff[jj + 11]; m2[j][4] = diff[jj + 4] + diff[jj + 12]; m2[j][5] = diff[jj + 5] + diff[jj + 13]; m2[j][6] = diff[jj + 6] + diff[jj + 14]; m2[j][7] = diff[jj + 7] + diff[jj + 15]; m2[j][8] = diff[jj ] - diff[jj + 8]; m2[j][9] = diff[jj + 1] - diff[jj + 9]; m2[j][10] = diff[jj + 2] - diff[jj + 10]; m2[j][11] = diff[jj + 3] - diff[jj + 11]; m2[j][12] = diff[jj + 4] - diff[jj + 12]; m2[j][13] = diff[jj + 5] - diff[jj + 13]; m2[j][14] = diff[jj + 6] - diff[jj + 14]; m2[j][15] = diff[jj + 7] - diff[jj + 15]; m1[j][0] = m2[j][0] + m2[j][4]; m1[j][1] = m2[j][1] + m2[j][5]; m1[j][2] = m2[j][2] + m2[j][6]; m1[j][3] = m2[j][3] + m2[j][7]; m1[j][4] = m2[j][0] - m2[j][4]; m1[j][5] = m2[j][1] - m2[j][5]; m1[j][6] = m2[j][2] - m2[j][6]; m1[j][7] = m2[j][3] - m2[j][7]; m1[j][8] = m2[j][8] + m2[j][12]; m1[j][9] = m2[j][9] + m2[j][13]; m1[j][10] = m2[j][10] + m2[j][14]; m1[j][11] = m2[j][11] + m2[j][15]; m1[j][12] = m2[j][8] - m2[j][12]; m1[j][13] = m2[j][9] - m2[j][13]; m1[j][14] = m2[j][10] - m2[j][14]; m1[j][15] = m2[j][11] - m2[j][15]; m2[j][0] = m1[j][0] + m1[j][2]; m2[j][1] = m1[j][1] + m1[j][3]; m2[j][2] = m1[j][0] - m1[j][2]; m2[j][3] = m1[j][1] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][6]; m2[j][5] = m1[j][5] + m1[j][7]; m2[j][6] = m1[j][4] - m1[j][6]; m2[j][7] = m1[j][5] - m1[j][7]; m2[j][8] = m1[j][8] + m1[j][10]; m2[j][9] = m1[j][9] + m1[j][11]; m2[j][10] = m1[j][8] - m1[j][10]; m2[j][11] = m1[j][9] - m1[j][11]; m2[j][12] = m1[j][12] + m1[j][14]; m2[j][13] = m1[j][13] + m1[j][15]; m2[j][14] = m1[j][12] - m1[j][14]; m2[j][15] = m1[j][13] - m1[j][15]; m1[j][0] = m2[j][0] + m2[j][1]; m1[j][1] = m2[j][0] - m2[j][1]; m1[j][2] = m2[j][2] + m2[j][3]; m1[j][3] = m2[j][2] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][5]; m1[j][5] = m2[j][4] - m2[j][5]; m1[j][6] = m2[j][6] + m2[j][7]; m1[j][7] = m2[j][6] - m2[j][7]; m1[j][8] = m2[j][8] + m2[j][9]; m1[j][9] = m2[j][8] - m2[j][9]; m1[j][10] = m2[j][10] + m2[j][11]; m1[j][11] = m2[j][10] - m2[j][11]; m1[j][12] = m2[j][12] + m2[j][13]; m1[j][13] = m2[j][12] - m2[j][13]; m1[j][14] = m2[j][14] + m2[j][15]; m1[j][15] = m2[j][14] - m2[j][15]; } //vertical for( i = 0; i < 16; i++ ) { m2[0][i] = m1[0][i] + m1[4][i]; m2[1][i] = m1[1][i] + m1[5][i]; m2[2][i] = m1[2][i] + m1[6][i]; m2[3][i] = m1[3][i] + m1[7][i]; m2[4][i] = m1[0][i] - m1[4][i]; m2[5][i] = m1[1][i] - m1[5][i]; m2[6][i] = m1[2][i] - m1[6][i]; m2[7][i] = m1[3][i] - m1[7][i]; m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m1[4][i] = m2[4][i] + m2[6][i]; m1[5][i] = m2[5][i] + m2[7][i]; m1[6][i] = m2[4][i] - m2[6][i]; m1[7][i] = m2[5][i] - m2[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for( i = 0; i < 8; i++ ) { for( j = 0; j < 16; j++ ) { sad += abs( m2[i][j] ); } } sad = ( int ) ( sad / sqrt( 16.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xCalcHADs8x16( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur ) { int k, i, j, jj, sad = 0; int diff[128], m1[16][8], m2[16][8]; for( k = 0; k < 128; k += 8 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; diff[k + 4] = piOrg[4] - piCur[4]; diff[k + 5] = piOrg[5] - piCur[5]; diff[k + 6] = piOrg[6] - piCur[6]; diff[k + 7] = piOrg[7] - piCur[7]; piCur += iStrideCur; piOrg += iStrideOrg; } //horizontal for( j = 0; j < 16; j++ ) { jj = j << 3; m2[j][0] = diff[jj] + diff[jj + 4]; m2[j][1] = diff[jj + 1] + diff[jj + 5]; m2[j][2] = diff[jj + 2] + diff[jj + 6]; m2[j][3] = diff[jj + 3] + diff[jj + 7]; m2[j][4] = diff[jj] - diff[jj + 4]; m2[j][5] = diff[jj + 1] - diff[jj + 5]; m2[j][6] = diff[jj + 2] - diff[jj + 6]; m2[j][7] = diff[jj + 3] - diff[jj + 7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for( i = 0; i < 8; i++ ) { m1[0][i] = m2[0][i] + m2[8][i]; m1[1][i] = m2[1][i] + m2[9][i]; m1[2][i] = m2[2][i] + m2[10][i]; m1[3][i] = m2[3][i] + m2[11][i]; m1[4][i] = m2[4][i] + m2[12][i]; m1[5][i] = m2[5][i] + m2[13][i]; m1[6][i] = m2[6][i] + m2[14][i]; m1[7][i] = m2[7][i] + m2[15][i]; m1[8][i] = m2[0][i] - m2[8][i]; m1[9][i] = m2[1][i] - m2[9][i]; m1[10][i] = m2[2][i] - m2[10][i]; m1[11][i] = m2[3][i] - m2[11][i]; m1[12][i] = m2[4][i] - m2[12][i]; m1[13][i] = m2[5][i] - m2[13][i]; m1[14][i] = m2[6][i] - m2[14][i]; m1[15][i] = m2[7][i] - m2[15][i]; m2[0][i] = m1[0][i] + m1[4][i]; m2[1][i] = m1[1][i] + m1[5][i]; m2[2][i] = m1[2][i] + m1[6][i]; m2[3][i] = m1[3][i] + m1[7][i]; m2[4][i] = m1[0][i] - m1[4][i]; m2[5][i] = m1[1][i] - m1[5][i]; m2[6][i] = m1[2][i] - m1[6][i]; m2[7][i] = m1[3][i] - m1[7][i]; m2[8][i] = m1[8][i] + m1[12][i]; m2[9][i] = m1[9][i] + m1[13][i]; m2[10][i] = m1[10][i] + m1[14][i]; m2[11][i] = m1[11][i] + m1[15][i]; m2[12][i] = m1[8][i] - m1[12][i]; m2[13][i] = m1[9][i] - m1[13][i]; m2[14][i] = m1[10][i] - m1[14][i]; m2[15][i] = m1[11][i] - m1[15][i]; m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m1[4][i] = m2[4][i] + m2[6][i]; m1[5][i] = m2[5][i] + m2[7][i]; m1[6][i] = m2[4][i] - m2[6][i]; m1[7][i] = m2[5][i] - m2[7][i]; m1[8][i] = m2[8][i] + m2[10][i]; m1[9][i] = m2[9][i] + m2[11][i]; m1[10][i] = m2[8][i] - m2[10][i]; m1[11][i] = m2[9][i] - m2[11][i]; m1[12][i] = m2[12][i] + m2[14][i]; m1[13][i] = m2[13][i] + m2[15][i]; m1[14][i] = m2[12][i] - m2[14][i]; m1[15][i] = m2[13][i] - m2[15][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; m2[8][i] = m1[8][i] + m1[9][i]; m2[9][i] = m1[8][i] - m1[9][i]; m2[10][i] = m1[10][i] + m1[11][i]; m2[11][i] = m1[10][i] - m1[11][i]; m2[12][i] = m1[12][i] + m1[13][i]; m2[13][i] = m1[12][i] - m1[13][i]; m2[14][i] = m1[14][i] + m1[15][i]; m2[15][i] = m1[14][i] - m1[15][i]; } for( i = 0; i < 16; i++ ) { for( j = 0; j < 8; j++ ) { sad += abs( m2[i][j] ); } } sad = ( int ) ( sad / sqrt( 16.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xCalcHADs4x8( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur ) { int k, i, j, jj, sad = 0; int diff[32], m1[8][4], m2[8][4]; for( k = 0; k < 32; k += 4 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; piCur += iStrideCur; piOrg += iStrideOrg; } //horizontal for( j = 0; j < 8; j++ ) { jj = j << 2; m2[j][0] = diff[jj] + diff[jj + 2]; m2[j][1] = diff[jj + 1] + diff[jj + 3]; m2[j][2] = diff[jj] - diff[jj + 2]; m2[j][3] = diff[jj + 1] - diff[jj + 3]; m1[j][0] = m2[j][0] + m2[j][1]; m1[j][1] = m2[j][0] - m2[j][1]; m1[j][2] = m2[j][2] + m2[j][3]; m1[j][3] = m2[j][2] - m2[j][3]; } //vertical for( i = 0; i < 4; i++ ) { m2[0][i] = m1[0][i] + m1[4][i]; m2[1][i] = m1[1][i] + m1[5][i]; m2[2][i] = m1[2][i] + m1[6][i]; m2[3][i] = m1[3][i] + m1[7][i]; m2[4][i] = m1[0][i] - m1[4][i]; m2[5][i] = m1[1][i] - m1[5][i]; m2[6][i] = m1[2][i] - m1[6][i]; m2[7][i] = m1[3][i] - m1[7][i]; m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m1[4][i] = m2[4][i] + m2[6][i]; m1[5][i] = m2[5][i] + m2[7][i]; m1[6][i] = m2[4][i] - m2[6][i]; m1[7][i] = m2[5][i] - m2[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for( i = 0; i < 8; i++ ) { for( j = 0; j < 4; j++ ) { sad += abs( m2[i][j] ); } } sad = ( int ) ( sad / sqrt( 4.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xCalcHADs8x4( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur ) { int k, i, j, jj, sad = 0; int diff[32], m1[4][8], m2[4][8]; for( k = 0; k < 32; k += 8 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; diff[k + 4] = piOrg[4] - piCur[4]; diff[k + 5] = piOrg[5] - piCur[5]; diff[k + 6] = piOrg[6] - piCur[6]; diff[k + 7] = piOrg[7] - piCur[7]; piCur += iStrideCur; piOrg += iStrideOrg; } //horizontal for( j = 0; j < 4; j++ ) { jj = j << 3; m2[j][0] = diff[jj] + diff[jj + 4]; m2[j][1] = diff[jj + 1] + diff[jj + 5]; m2[j][2] = diff[jj + 2] + diff[jj + 6]; m2[j][3] = diff[jj + 3] + diff[jj + 7]; m2[j][4] = diff[jj] - diff[jj + 4]; m2[j][5] = diff[jj + 1] - diff[jj + 5]; m2[j][6] = diff[jj + 2] - diff[jj + 6]; m2[j][7] = diff[jj + 3] - diff[jj + 7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for( i = 0; i < 8; i++ ) { m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; } for( i = 0; i < 4; i++ ) { for( j = 0; j < 8; j++ ) { sad += abs( m2[i][j] ); } } sad = ( int ) ( sad / sqrt( 4.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xGetHADs( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetHADsw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iRows = rcDtParam.org.height; const int iCols = rcDtParam.org.width; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const int iStep = rcDtParam.step; int x = 0, y = 0; Distortion uiSum = 0; if( iCols > iRows && ( iRows & 7 ) == 0 && ( iCols & 15 ) == 0 ) { for( y = 0; y < iRows; y += 8 ) { for( x = 0; x < iCols; x += 16 ) { uiSum += xCalcHADs16x8( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); } piOrg += iStrideOrg * 8; piCur += iStrideCur * 8; } } else if( iCols < iRows && ( iCols & 7 ) == 0 && ( iRows & 15 ) == 0 ) { for( y = 0; y < iRows; y += 16 ) { for( x = 0; x < iCols; x += 8 ) { uiSum += xCalcHADs8x16( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); } piOrg += iStrideOrg * 16; piCur += iStrideCur * 16; } } else if( iCols > iRows && ( iRows & 3 ) == 0 && ( iCols & 7 ) == 0 ) { for( y = 0; y < iRows; y += 4 ) { for( x = 0; x < iCols; x += 8 ) { uiSum += xCalcHADs8x4( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); } piOrg += iStrideOrg * 4; piCur += iStrideCur * 4; } } else if( iCols < iRows && ( iCols & 3 ) == 0 && ( iRows & 7 ) == 0 ) { for( y = 0; y < iRows; y += 8 ) { for( x = 0; x < iCols; x += 4 ) { uiSum += xCalcHADs4x8( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); } piOrg += iStrideOrg * 8; piCur += iStrideCur * 8; } } else if( ( iRows % 8 == 0 ) && ( iCols % 8 == 0 ) ) { int iOffsetOrg = iStrideOrg << 3; int iOffsetCur = iStrideCur << 3; for( y = 0; y < iRows; y += 8 ) { for( x = 0; x < iCols; x += 8 ) { uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); } piOrg += iOffsetOrg; piCur += iOffsetCur; } } else if( ( iRows % 4 == 0 ) && ( iCols % 4 == 0 ) ) { int iOffsetOrg = iStrideOrg << 2; int iOffsetCur = iStrideCur << 2; for( y = 0; y < iRows; y += 4 ) { for( x = 0; x < iCols; x += 4 ) { uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); } piOrg += iOffsetOrg; piCur += iOffsetCur; } } else if( ( iRows % 2 == 0 ) && ( iCols % 2 == 0 ) ) { int iOffsetOrg = iStrideOrg << 1; int iOffsetCur = iStrideCur << 1; for( y = 0; y < iRows; y += 2 ) { for( x = 0; x < iCols; x += 2 ) { uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep ); } piOrg += iOffsetOrg; piCur += iOffsetCur; } } else { THROW( "Invalid size" ); } return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } #if WCG_EXT uint32_t RdCost::m_signalType = RESHAPE_SIGNAL_NULL; double RdCost::m_chromaWeight = 1.0; int RdCost::m_lumaBD = 10; std::vector<double> RdCost::m_reshapeLumaLevelToWeightPLUT; std::vector<double> RdCost::m_lumaLevelToWeightPLUT; void RdCost::saveUnadjustedLambda() { m_dLambda_unadjusted = m_dLambda; m_DistScaleUnadjusted = m_DistScale; } void RdCost::initLumaLevelToWeightTable() { for (int i = 0; i < LUMA_LEVEL_TO_DQP_LUT_MAXSIZE; i++) { double x = i; double y; /* //always false if (isSDR) // set SDR weight table { y = 0.03*x - 3.0; // this is the Equation used to derive the luma qp LUT for SDR in ST-2084 y = y<0 ? 0 : (y>12 ? 12 : y); } else */ { // set SDR weight table y = 0.015*x - 1.5 - 6; // this is the Equation used to derive the luma qp LUT for HDR in MPEG HDR anchor3.2 (JCTCX-X1020) y = y<-3 ? -3 : (y>6 ? 6 : y); } m_lumaLevelToWeightPLUT[i] = pow(2.0, y / 3.0); // or power(10, dQp/10) they are almost equal } } void RdCost::initLumaLevelToWeightTableReshape() { int lutSize = 1 << m_lumaBD; if (m_reshapeLumaLevelToWeightPLUT.empty()) m_reshapeLumaLevelToWeightPLUT.resize(lutSize, 1.0); if (m_lumaLevelToWeightPLUT.empty()) m_lumaLevelToWeightPLUT.resize(lutSize, 1.0); if (m_signalType == RESHAPE_SIGNAL_PQ) { for (int i = 0; i < (1 << m_lumaBD); i++) { double x = m_lumaBD < 10 ? i << (10 - m_lumaBD) : m_lumaBD > 10 ? i >> (m_lumaBD - 10) : i; double y; y = 0.015*x - 1.5 - 6; y = y < -3 ? -3 : (y > 6 ? 6 : y); m_reshapeLumaLevelToWeightPLUT[i] = pow(2.0, y / 3.0); m_lumaLevelToWeightPLUT[i] = m_reshapeLumaLevelToWeightPLUT[i]; } } } void RdCost::updateReshapeLumaLevelToWeightTableChromaMD(std::vector<Pel>& ILUT) { for (int i = 0; i < (1 << m_lumaBD); i++) { m_reshapeLumaLevelToWeightPLUT[i] = m_lumaLevelToWeightPLUT[ILUT[i]]; } } void RdCost::restoreReshapeLumaLevelToWeightTable() { for (int i = 0; i < (1 << m_lumaBD); i++) { m_reshapeLumaLevelToWeightPLUT.at(i) = m_lumaLevelToWeightPLUT.at(i); } } void RdCost::updateReshapeLumaLevelToWeightTable(SliceReshapeInfo &sliceReshape, Pel *wtTable, double cwt) { if (m_signalType == RESHAPE_SIGNAL_SDR) { if (sliceReshape.getSliceReshapeModelPresentFlag()) { double wBin = 1.0; double weight = 1.0; int histLens = (1 << m_lumaBD) / PIC_CODE_CW_BINS; for (int i = 0; i < PIC_CODE_CW_BINS; i++) { if ((i < sliceReshape.reshaperModelMinBinIdx) || (i > sliceReshape.reshaperModelMaxBinIdx)) weight = 1.0; else { if (sliceReshape.reshaperModelBinCWDelta[i] == 1 || (sliceReshape.reshaperModelBinCWDelta[i] == -1 * histLens)) weight = wBin; else { weight = (double)wtTable[i] / (double)histLens; weight = weight*weight; } } for (int j = 0; j < histLens; j++) { int ii = i*histLens + j; m_reshapeLumaLevelToWeightPLUT[ii] = weight; } } m_chromaWeight = cwt; } else { THROW("updateReshapeLumaLevelToWeightTable ERROR!!"); } } else { THROW("updateReshapeLumaLevelToWeightTable not support other signal types!!"); } } Distortion RdCost::getWeightedMSE(int compIdx, const Pel org, const Pel cur, const uint32_t uiShift, const Pel orgLuma) { Distortion distortionVal = 0; Intermediate_Int iTemp = org - cur; CHECK( org<0, ""); if (compIdx == COMPONENT_Y) { CHECK(org!=orgLuma, ""); } // use luma to get weight double weight = 1.0; if (m_signalType == RESHAPE_SIGNAL_SDR) { if (compIdx == COMPONENT_Y) { weight = m_reshapeLumaLevelToWeightPLUT[orgLuma]; } else { weight = m_chromaWeight; } } else { weight = m_reshapeLumaLevelToWeightPLUT[orgLuma]; } int64_t fixedPTweight = (int64_t)(weight * (double)(1 << 16)); Intermediate_Int mse = Intermediate_Int((fixedPTweight*(iTemp*iTemp) + (1 << 15)) >> 16); distortionVal = Distortion( mse >> uiShift); return distortionVal; } Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); // ignore it for now } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iCols = rcDtParam.org.width; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const int iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const int cShift = 0; #else const int cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { for (int n = 0; n < iCols; n++ ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n ], piCur[n ], uiShift, piOrgLuma[n<<cShift]); } piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE2_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 2, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); // ignore it for now } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[0 ], piCur[0 ], uiShift, piOrgLuma[size_t(0)<<cShift]); // piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift]); // piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE4_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 4, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); // ignore it for now } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[0 ], piCur[0 ], uiShift, piOrgLuma[size_t(0)<<cShift]); // piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift] ); // piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[2 ], piCur[2 ], uiShift, piOrgLuma[size_t(2)<<cShift] ); // piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3 ], piCur[3 ], uiShift, piOrgLuma[size_t(3)<<cShift] ); // piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE8_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 8, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[0 ], piCur[0 ], uiShift, piOrgLuma[0 ]); // piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift ]); // piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[2 ], piCur[2 ], uiShift, piOrgLuma[size_t(2)<<cShift ]); //piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3 ], piCur[3 ], uiShift, piOrgLuma[size_t(3)<<cShift ]); // piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[4 ], piCur[4 ], uiShift, piOrgLuma[size_t(4)<<cShift ]); // piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[5 ], piCur[5 ], uiShift, piOrgLuma[size_t(5)<<cShift ]); // piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[6 ], piCur[6 ], uiShift, piOrgLuma[size_t(6)<<cShift ]); // piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[7 ], piCur[7 ], uiShift, piOrgLuma[size_t(7)<<cShift ]); // piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE16_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 16, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[0 ], piCur[0 ], uiShift, piOrgLuma[0 ]); // piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift ]); //piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[2 ], piCur[2 ], uiShift, piOrgLuma[size_t(2)<<cShift ]); //piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3 ], piCur[3 ], uiShift, piOrgLuma[size_t(3)<<cShift ]); //piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[4 ], piCur[4 ], uiShift, piOrgLuma[size_t(4)<<cShift ]); //piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[5 ], piCur[5 ], uiShift, piOrgLuma[size_t(5)<<cShift ]); //piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[6 ], piCur[6 ], uiShift, piOrgLuma[size_t(6)<<cShift ]); //piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[7 ], piCur[7 ], uiShift, piOrgLuma[size_t(7)<<cShift ]); //piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[8 ], piCur[8 ], uiShift, piOrgLuma[size_t(8)<<cShift ]); //piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[9 ], piCur[9 ], uiShift, piOrgLuma[size_t(9)<<cShift ]); //piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[10 ], piCur[10 ], uiShift, piOrgLuma[size_t(10)<<cShift ]); //piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[11 ], piCur[11 ], uiShift, piOrgLuma[size_t(11)<<cShift ]); //piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[12 ], piCur[12 ], uiShift, piOrgLuma[size_t(12)<<cShift ]); //piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[13 ], piCur[13 ], uiShift, piOrgLuma[size_t(13)<<cShift ]); //piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[14 ], piCur[14 ], uiShift, piOrgLuma[size_t(14)<<cShift ]); //piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[15 ], piCur[15 ], uiShift, piOrgLuma[size_t(15)<<cShift ]); //piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE16N_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iCols = rcDtParam.org.width; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { for (int n = 0; n < iCols; n+=16 ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+0 ], piCur[n+0 ], uiShift, piOrgLuma[size_t(n+0)<<cShift ]); // iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+1 ], piCur[n+1 ], uiShift, piOrgLuma[size_t(n+1)<<cShift ]); // iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+2 ], piCur[n+2 ], uiShift, piOrgLuma[size_t(n+2)<<cShift ]); // iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+3 ], piCur[n+3 ], uiShift, piOrgLuma[size_t(n+3)<<cShift ]); // iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+4 ], piCur[n+4 ], uiShift, piOrgLuma[size_t(n+4)<<cShift ]); // iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+5 ], piCur[n+5 ], uiShift, piOrgLuma[size_t(n+5)<<cShift ]); // iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+6 ], piCur[n+6 ], uiShift, piOrgLuma[size_t(n+6)<<cShift ]); // iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+7 ], piCur[n+7 ], uiShift, piOrgLuma[size_t(n+7)<<cShift ]); // iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+8 ], piCur[n+8 ], uiShift, piOrgLuma[size_t(n+8)<<cShift ]); // iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+9 ], piCur[n+9 ], uiShift, piOrgLuma[size_t(n+9)<<cShift ]); // iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+10], piCur[n+10], uiShift, piOrgLuma[size_t(n+10)<<cShift ]); // iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+11], piCur[n+11], uiShift, piOrgLuma[size_t(n+11)<<cShift ]); // iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+12], piCur[n+12], uiShift, piOrgLuma[size_t(n+12)<<cShift]); // iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+13], piCur[n+13], uiShift, piOrgLuma[size_t(n+13)<<cShift ]); // iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+14], piCur[n+14], uiShift, piOrgLuma[size_t(n+14)<<cShift ]); // iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[n+15], piCur[n+15], uiShift, piOrgLuma[size_t(n+15)<<cShift ]); // iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); } piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE32_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 32, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for( ; iRows != 0; iRows-- ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[0 ], piCur[0 ], uiShift, piOrgLuma[size_t(0) ]); // iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift ]); // iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[2 ], piCur[2 ], uiShift, piOrgLuma[size_t(2)<<cShift ]); // iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3 ], piCur[3 ], uiShift, piOrgLuma[size_t(3)<<cShift ]); // iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[4 ], piCur[4 ], uiShift, piOrgLuma[size_t(4)<<cShift ]); // iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[5 ], piCur[5 ], uiShift, piOrgLuma[size_t(5)<<cShift ]); // iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[6 ], piCur[6 ], uiShift, piOrgLuma[size_t(6)<<cShift ]); // iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[7 ], piCur[7 ], uiShift, piOrgLuma[size_t(7)<<cShift ]); // iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[8 ], piCur[8 ], uiShift, piOrgLuma[size_t(8)<<cShift ]); // iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[9 ], piCur[9 ], uiShift, piOrgLuma[size_t(9)<<cShift ]); // iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[10], piCur[10], uiShift, piOrgLuma[size_t(10)<<cShift ]); // iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[11], piCur[11], uiShift, piOrgLuma[size_t(11)<<cShift ]); // iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[12], piCur[12], uiShift, piOrgLuma[size_t(12)<<cShift ]); // iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[13], piCur[13], uiShift, piOrgLuma[size_t(13)<<cShift ]); // iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[14], piCur[14], uiShift, piOrgLuma[size_t(14)<<cShift ]); // iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[15], piCur[15], uiShift, piOrgLuma[size_t(15)<<cShift ]); // iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[16], piCur[16], uiShift, piOrgLuma[size_t(16)<<cShift ]); // iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[17], piCur[17], uiShift, piOrgLuma[size_t(17)<<cShift ]); // iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[18], piCur[18], uiShift, piOrgLuma[size_t(18)<<cShift ]); // iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[19], piCur[19], uiShift, piOrgLuma[size_t(19)<<cShift ]); // iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[20], piCur[20], uiShift, piOrgLuma[size_t(20)<<cShift ]); // iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[21], piCur[21], uiShift, piOrgLuma[size_t(21)<<cShift ]); // iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[22], piCur[22], uiShift, piOrgLuma[size_t(22)<<cShift ]); // iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[23], piCur[23], uiShift, piOrgLuma[size_t(23)<<cShift ]); // iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[24], piCur[24], uiShift, piOrgLuma[size_t(24)<<cShift ]); // iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[25], piCur[25], uiShift, piOrgLuma[size_t(25)<<cShift ]); // iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[26], piCur[26], uiShift, piOrgLuma[size_t(26)<<cShift ]); // iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[27], piCur[27], uiShift, piOrgLuma[size_t(27)<<cShift ]); // iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[28], piCur[28], uiShift, piOrgLuma[size_t(28)<<cShift ]); // iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[29], piCur[29], uiShift, piOrgLuma[size_t(29)<<cShift ]); // iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[30], piCur[30], uiShift, piOrgLuma[size_t(30)<<cShift ]); // iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[31], piCur[31], uiShift, piOrgLuma[size_t(31)<<cShift ]); // iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } Distortion RdCost::xGetSSE64_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 64, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int iRows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int iStrideCur = rcDtParam.cur.stride; const int iStrideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t iStrideOrgLuma = rcDtParam.orgLuma.stride; #if JVET_N0671_RDCOST_FIX const size_t cShift = 0; #else const size_t cShift = (rcDtParam.compID==COMPONENT_Y) ? 0 : 1; // assume 420, could use getComponentScaleX, getComponentScaleY #endif Distortion uiSum = 0; uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT((rcDtParam.bitDepth)) << 1; for( ; iRows != 0; iRows-- ) { uiSum += getWeightedMSE(rcDtParam.compID, piOrg[0 ], piCur[0 ], uiShift, piOrgLuma[size_t(0) ]); // iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[1 ], piCur[1 ], uiShift, piOrgLuma[size_t(1)<<cShift ]); // iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[2 ], piCur[2 ], uiShift, piOrgLuma[size_t(2)<<cShift ]); // iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[3 ], piCur[3 ], uiShift, piOrgLuma[size_t(3)<<cShift ]); // iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[4 ], piCur[4 ], uiShift, piOrgLuma[size_t(4)<<cShift ]); // iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[5 ], piCur[5 ], uiShift, piOrgLuma[size_t(5)<<cShift ]); // iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[6 ], piCur[6 ], uiShift, piOrgLuma[size_t(6)<<cShift ]); // iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[7 ], piCur[7 ], uiShift, piOrgLuma[size_t(7)<<cShift ]); // iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[8 ], piCur[8 ], uiShift, piOrgLuma[size_t(8)<<cShift ]); // iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[9 ], piCur[9 ], uiShift, piOrgLuma[size_t(9)<<cShift ]); // iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[10], piCur[10], uiShift, piOrgLuma[size_t(10)<<cShift]); // iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[11], piCur[11], uiShift, piOrgLuma[size_t(11)<<cShift]); // iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[12], piCur[12], uiShift, piOrgLuma[size_t(12)<<cShift]); // iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[13], piCur[13], uiShift, piOrgLuma[size_t(13)<<cShift]); // iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[14], piCur[14], uiShift, piOrgLuma[size_t(14)<<cShift]); // iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[15], piCur[15], uiShift, piOrgLuma[size_t(15)<<cShift]); // iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[16], piCur[16], uiShift, piOrgLuma[size_t(16)<<cShift]); // iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[17], piCur[17], uiShift, piOrgLuma[size_t(17)<<cShift]); // iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[18], piCur[18], uiShift, piOrgLuma[size_t(18)<<cShift]); // iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[19], piCur[19], uiShift, piOrgLuma[size_t(19)<<cShift]); // iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[20], piCur[20], uiShift, piOrgLuma[size_t(20)<<cShift]); // iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[21], piCur[21], uiShift, piOrgLuma[size_t(21)<<cShift]); // iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[22], piCur[22], uiShift, piOrgLuma[size_t(22)<<cShift]); // iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[23], piCur[23], uiShift, piOrgLuma[size_t(23)<<cShift]); // iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[24], piCur[24], uiShift, piOrgLuma[size_t(24)<<cShift]); // iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[25], piCur[25], uiShift, piOrgLuma[size_t(25)<<cShift]); // iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[26], piCur[26], uiShift, piOrgLuma[size_t(26)<<cShift]); // iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[27], piCur[27], uiShift, piOrgLuma[size_t(27)<<cShift]); // iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[28], piCur[28], uiShift, piOrgLuma[size_t(28)<<cShift]); // iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[29], piCur[29], uiShift, piOrgLuma[size_t(29)<<cShift]); // iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[30], piCur[30], uiShift, piOrgLuma[size_t(30)<<cShift]); // iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[31], piCur[31], uiShift, piOrgLuma[size_t(31)<<cShift]); // iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[32], piCur[32], uiShift, piOrgLuma[size_t(32)<<cShift]); // iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[33], piCur[33], uiShift, piOrgLuma[size_t(33)<<cShift]); // iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[34], piCur[34], uiShift, piOrgLuma[size_t(34)<<cShift]); // iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[35], piCur[35], uiShift, piOrgLuma[size_t(35)<<cShift]); // iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[36], piCur[36], uiShift, piOrgLuma[size_t(36)<<cShift]); // iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[37], piCur[37], uiShift, piOrgLuma[size_t(37)<<cShift]); // iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[38], piCur[38], uiShift, piOrgLuma[size_t(38)<<cShift]); // iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[39], piCur[39], uiShift, piOrgLuma[size_t(39)<<cShift]); // iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[40], piCur[40], uiShift, piOrgLuma[size_t(40)<<cShift]); // iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[41], piCur[41], uiShift, piOrgLuma[size_t(41)<<cShift]); // iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[42], piCur[42], uiShift, piOrgLuma[size_t(42)<<cShift]); // iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[43], piCur[43], uiShift, piOrgLuma[size_t(43)<<cShift]); // iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[44], piCur[44], uiShift, piOrgLuma[size_t(44)<<cShift]); // iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[45], piCur[45], uiShift, piOrgLuma[size_t(45)<<cShift]); // iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[46], piCur[46], uiShift, piOrgLuma[size_t(46)<<cShift]); // iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[47], piCur[47], uiShift, piOrgLuma[size_t(47)<<cShift]); // iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[48], piCur[48], uiShift, piOrgLuma[size_t(48)<<cShift]); // iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[49], piCur[49], uiShift, piOrgLuma[size_t(49)<<cShift]); // iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[50], piCur[50], uiShift, piOrgLuma[size_t(50)<<cShift]); // iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[51], piCur[51], uiShift, piOrgLuma[size_t(51)<<cShift]); // iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[52], piCur[52], uiShift, piOrgLuma[size_t(52)<<cShift]); // iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[53], piCur[53], uiShift, piOrgLuma[size_t(53)<<cShift]); // iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[54], piCur[54], uiShift, piOrgLuma[size_t(54)<<cShift]); // iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[55], piCur[55], uiShift, piOrgLuma[size_t(55)<<cShift]); // iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[56], piCur[56], uiShift, piOrgLuma[size_t(56)<<cShift]); // iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[57], piCur[57], uiShift, piOrgLuma[size_t(57)<<cShift]); // iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[58], piCur[58], uiShift, piOrgLuma[size_t(58)<<cShift]); // iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[59], piCur[59], uiShift, piOrgLuma[size_t(59)<<cShift]); // iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[60], piCur[60], uiShift, piOrgLuma[size_t(60)<<cShift]); // iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[61], piCur[61], uiShift, piOrgLuma[size_t(61)<<cShift]); // iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[62], piCur[62], uiShift, piOrgLuma[size_t(62)<<cShift]); // iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); uiSum += getWeightedMSE(rcDtParam.compID, piOrg[63], piCur[63], uiShift, piOrgLuma[size_t(63)<<cShift]); // iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); piOrg += iStrideOrg; piCur += iStrideCur; piOrgLuma += iStrideOrgLuma<<cShift; } return ( uiSum ); } #endif Pel orgCopy[MAX_CU_SIZE * MAX_CU_SIZE]; #if _OPENMP #pragma omp threadprivate(orgCopy) #endif Distortion RdCost::xGetMRHADs( const DistParam &rcDtParam ) { const Pel offset = rcDtParam.org.meanDiff( rcDtParam.cur ); PelBuf modOrg( orgCopy, rcDtParam.org ); modOrg.copyFrom( rcDtParam.org ); modOrg.subtract( offset ); DistParam modDistParam = rcDtParam; modDistParam.org = modOrg; return m_afpDistortFunc[DF_HAD]( modDistParam ); } //! \}