/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2019, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file WeightPredAnalysis.cpp \brief weighted prediction encoder class */ #include "../CommonLib/CommonDef.h" #include "../CommonLib/Slice.h" #include "../CommonLib/Picture.h" #include "WeightPredAnalysis.h" #include <limits> static const double WEIGHT_PRED_SAD_RELATIVE_TO_NON_WEIGHT_PRED_SAD=0.99; // NOTE: U0040 used 0.95 //! calculate SAD values for both WP version and non-WP version. static int64_t xCalcSADvalueWP(const int bitDepth, const Pel *pOrgPel, const Pel *pRefPel, const int width, const int height, const int orgStride, const int refStride, const int log2Denom, const int weight, const int offset, const bool useHighPrecision); //! calculate SAD values for both WP version and non-WP version. static int64_t xCalcSADvalueWPOptionalClip(const int bitDepth, const Pel *pOrgPel, const Pel *pRefPel, const int width, const int height, const int orgStride, const int refStride, const int log2Denom, const int weight, const int offset, const bool useHighPrecision, const bool clipped); // ----------------------------------------------------------------------------- // Helper functions //! calculate Histogram for array of pixels static void xCalcHistogram(const Pel *pPel, std::vector<int> &histogram, const int width, const int height, const int stride, const int maxPel) { histogram.clear(); histogram.resize(maxPel); for( int y = 0; y < height; y++ ) { for( int x = 0; x < width; x++ ) { const Pel v=pPel[x]; histogram[v<0?0:(v>=maxPel)?maxPel-1:v]++; } pPel += stride; } } static Distortion xCalcHistDistortion (const std::vector<int> &histogram0, const std::vector<int> &histogram1) { Distortion distortion = 0; CHECK(histogram0.size()!=histogram1.size(), "Different histogram sizes"); const int numElements=int(histogram0.size()); // Scan histograms to compute histogram distortion for (int i = 0; i <= numElements; i++) { distortion += (Distortion)(abs(histogram0[i] - histogram1[i])); } return distortion; } static void xScaleHistogram(const std::vector<int> &histogramInput, std::vector<int> &histogramOutput, // cannot be the same as the input const int bitDepth, const int log2Denom, const int weight, const int offset, const bool bHighPrecision) { CHECK(&histogramInput == &histogramOutput, "Input and output histogram are the same"); const int numElements=int(histogramInput.size()); histogramOutput.clear(); histogramOutput.resize(numElements); const int64_t iRealLog2Denom = bHighPrecision ? 0 : (bitDepth - 8); const int64_t iRealOffset = ((int64_t)offset)<<iRealLog2Denom; const int divOffset = log2Denom == 0 ? 0 : 1 << (log2Denom - 1); // Scan histogram and apply illumination parameters appropriately // Then compute updated histogram. // Note that this technique only works with single list weights/offsets. for (int i = 0; i < numElements; i++) { const int j = Clip3(0, numElements - 1, (int)(((weight * i + divOffset) >> log2Denom) + iRealOffset)); histogramOutput[j] += histogramInput[i]; } } static Distortion xSearchHistogram(const std::vector<int> &histogramSource, const std::vector<int> &histogramRef, std::vector<int> &outputHistogram, const int bitDepth, const int log2Denom, int &weightToUpdate, int &offsetToUpdate, const bool bHighPrecision, const ComponentID compID) { const int initialWeight = weightToUpdate; const int initialOffset = offsetToUpdate; const int weightRange = 10; const int offsetRange = 10; const int maxOffset = 1 << ((bHighPrecision == true) ? (bitDepth - 1) : 7); const int range = bHighPrecision ? (1<<bitDepth) / 2 : 128; const int defaultWeight = (1<<log2Denom); const int minSearchWeight = std::max<int>(initialWeight - weightRange, defaultWeight - range); const int maxSearchWeight = std::min<int>(initialWeight + weightRange+1, defaultWeight + range); Distortion minDistortion = std::numeric_limits<Distortion>::max(); int bestWeight = initialWeight; int bestOffset = initialOffset; for (int searchWeight = minSearchWeight; searchWeight < maxSearchWeight; searchWeight++) { if (compID == COMPONENT_Y) { for (int searchOffset = std::max<int>(initialOffset - offsetRange, -maxOffset); searchOffset <= initialOffset + offsetRange && searchOffset<=(maxOffset-1); searchOffset++) { xScaleHistogram(histogramRef, outputHistogram, bitDepth, log2Denom, searchWeight, searchOffset, bHighPrecision); const Distortion distortion = xCalcHistDistortion(histogramSource, outputHistogram); if (distortion < minDistortion) { minDistortion = distortion; bestWeight = searchWeight; bestOffset = searchOffset; } } } else { const int pred = ( maxOffset - ( ( maxOffset*searchWeight)>>(log2Denom) ) ); for (int searchOffset = initialOffset - offsetRange; searchOffset <= initialOffset + offsetRange; searchOffset++) { const int deltaOffset = Clip3( -4*maxOffset, 4*maxOffset-1, (searchOffset - pred) ); // signed 10bit (if !bHighPrecision) const int clippedOffset = Clip3( -1*maxOffset, 1*maxOffset-1, (deltaOffset + pred) ); // signed 8bit (if !bHighPrecision) xScaleHistogram(histogramRef, outputHistogram, bitDepth, log2Denom, searchWeight, clippedOffset, bHighPrecision); const Distortion distortion = xCalcHistDistortion(histogramSource, outputHistogram); if (distortion < minDistortion) { minDistortion = distortion; bestWeight = searchWeight; bestOffset = clippedOffset; } } } } weightToUpdate = bestWeight; offsetToUpdate = bestOffset; // regenerate best histogram xScaleHistogram(histogramRef, outputHistogram, bitDepth, log2Denom, bestWeight, bestOffset, bHighPrecision); return minDistortion; } // ----------------------------------------------------------------------------- // Member functions WeightPredAnalysis::WeightPredAnalysis() { for ( uint32_t lst =0 ; lst<NUM_REF_PIC_LIST_01 ; lst++ ) { for ( int refIdx=0 ; refIdx<MAX_NUM_REF ; refIdx++ ) { for ( int comp=0 ; comp<MAX_NUM_COMPONENT ;comp++ ) { WPScalingParam *pwp = &(m_wp[lst][refIdx][comp]); pwp->bPresentFlag = false; pwp->uiLog2WeightDenom = 0; pwp->iWeight = 1; pwp->iOffset = 0; } } } } //! calculate AC and DC values for current original image void WeightPredAnalysis::xCalcACDCParamSlice(Slice *const slice) { //===== calculate AC/DC value ===== // PicYuv* pPic = slice->getPic()->getPicYuvOrg(); const CPelUnitBuf pPic = slice->getPic()->getOrigBuf(); WPACDCParam weightACDCParam[MAX_NUM_COMPONENT]; for(int componentIndex = 0; componentIndex < ::getNumberValidComponents(pPic.chromaFormat); componentIndex++) { const ComponentID compID = ComponentID(componentIndex); const CPelBuf compBuf = pPic.get( compID ); // calculate DC/AC value for channel const int stride = compBuf.stride; const int width = compBuf.width; const int height = compBuf.height; const int sample = width*height; int64_t orgDC = 0; { const Pel *pPel = compBuf.buf; for(int y = 0; y < height; y++, pPel+=stride ) { for(int x = 0; x < width; x++ ) { orgDC += (int)( pPel[x] ); } } } const int64_t orgNormDC = ((orgDC+(sample>>1)) / sample); int64_t orgAC = 0; { const Pel *pPel = compBuf.buf; for(int y = 0; y < height; y++, pPel += stride ) { for(int x = 0; x < width; x++ ) { orgAC += abs( (int)pPel[x] - (int)orgNormDC ); } } } const int fixedBitShift = (slice->getSPS()->getSpsRangeExtension().getHighPrecisionOffsetsEnabledFlag())?RExt__PREDICTION_WEIGHTING_ANALYSIS_DC_PRECISION:0; weightACDCParam[compID].iDC = (((orgDC<<fixedBitShift)+(sample>>1)) / sample); weightACDCParam[compID].iAC = orgAC; } slice->setWpAcDcParam(weightACDCParam); } //! check weighted pred or non-weighted pred void WeightPredAnalysis::xCheckWPEnable(Slice *const slice) { // const PicYuv *pPic = slice->getPic()->getPicYuvOrg(); int presentCnt = 0; for ( uint32_t lst=0 ; lst<NUM_REF_PIC_LIST_01 ; lst++ ) { for ( int refIdx=0 ; refIdx<MAX_NUM_REF ; refIdx++ ) { for(int componentIndex = 0; componentIndex < ::getNumberValidComponents( slice->getSPS()->getChromaFormatIdc() ); componentIndex++) { WPScalingParam *pwp = &(m_wp[lst][refIdx][componentIndex]); presentCnt += (int)pwp->bPresentFlag; } } } if(presentCnt==0) { slice->setTestWeightPred(false); slice->setTestWeightBiPred(false); for ( uint32_t lst=0 ; lst<NUM_REF_PIC_LIST_01 ; lst++ ) { for ( int refIdx=0 ; refIdx<MAX_NUM_REF ; refIdx++ ) { for(int componentIndex = 0; componentIndex < ::getNumberValidComponents( slice->getSPS()->getChromaFormatIdc() ); componentIndex++) { WPScalingParam *pwp = &(m_wp[lst][refIdx][componentIndex]); pwp->bPresentFlag = false; pwp->uiLog2WeightDenom = 0; pwp->iWeight = 1; pwp->iOffset = 0; } } } slice->setWpScaling( m_wp ); } else { slice->setTestWeightPred (slice->getPPS()->getUseWP()); slice->setTestWeightBiPred(slice->getPPS()->getWPBiPred()); } } //! estimate wp tables for explicit wp void WeightPredAnalysis::xEstimateWPParamSlice(Slice *const slice, const WeightedPredictionMethod method) { int iDenom = 6; bool validRangeFlag = false; if(slice->getNumRefIdx(REF_PIC_LIST_0)>3) { iDenom = 7; } do { validRangeFlag = xUpdatingWPParameters(slice, iDenom); if (!validRangeFlag) { iDenom--; // decrement to satisfy the range limitation } } while (validRangeFlag == false); // selecting whether WP is used, or not (fast search) // NOTE: This is not operating on a slice, but the entire picture. switch (method) { case WP_PER_PICTURE_WITH_SIMPLE_DC_COMBINED_COMPONENT: xSelectWP(slice, iDenom); break; case WP_PER_PICTURE_WITH_SIMPLE_DC_PER_COMPONENT: xSelectWPHistExtClip(slice, iDenom, false, false, false); break; case WP_PER_PICTURE_WITH_HISTOGRAM_AND_PER_COMPONENT: xSelectWPHistExtClip(slice, iDenom, false, false, true); break; case WP_PER_PICTURE_WITH_HISTOGRAM_AND_PER_COMPONENT_AND_CLIPPING: xSelectWPHistExtClip(slice, iDenom, false, true, true); break; case WP_PER_PICTURE_WITH_HISTOGRAM_AND_PER_COMPONENT_AND_CLIPPING_AND_EXTENSION: xSelectWPHistExtClip(slice, iDenom, true, true, true); break; default: THROW("Invalid WP method"); break; } slice->setWpScaling( m_wp ); } //! update wp tables for explicit wp w.r.t range limitation bool WeightPredAnalysis::xUpdatingWPParameters(Slice *const slice, const int log2Denom) { const int numComp = ::getNumberValidComponents( slice->getSPS()->getChromaFormatIdc() ); const bool bUseHighPrecisionWeighting = slice->getSPS()->getSpsRangeExtension().getHighPrecisionOffsetsEnabledFlag(); const int numPredDir = slice->isInterP() ? 1 : 2; CHECK(numPredDir > int(NUM_REF_PIC_LIST_01), "Invalid reference picture list"); for ( int refList = 0; refList < numPredDir; refList++ ) { const RefPicList eRefPicList = ( refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); for ( int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(eRefPicList); refIdxTemp++ ) { const WPACDCParam *currWeightACDCParam, *refWeightACDCParam; slice->getWpAcDcParam(currWeightACDCParam); slice->getRefPic(eRefPicList, refIdxTemp)->slices[0]->getWpAcDcParam(refWeightACDCParam); for ( int comp = 0; comp < numComp; comp++ ) { const ComponentID compID = ComponentID(comp); const int bitDepth = slice->getSPS()->getBitDepth(toChannelType(compID)); const int range = bUseHighPrecisionWeighting ? (1<<bitDepth)/2 : 128; const int realLog2Denom = log2Denom + (bUseHighPrecisionWeighting ? RExt__PREDICTION_WEIGHTING_ANALYSIS_DC_PRECISION : (bitDepth - 8)); const int realOffset = ((int)1<<(realLog2Denom-1)); // current frame const int64_t currDC = currWeightACDCParam[comp].iDC; const int64_t currAC = currWeightACDCParam[comp].iAC; // reference frame const int64_t refDC = refWeightACDCParam[comp].iDC; const int64_t refAC = refWeightACDCParam[comp].iAC; // calculating iWeight and iOffset params const double dWeight = (refAC==0) ? (double)1.0 : Clip3( -16.0, 15.0, ((double)currAC / (double)refAC) ); const int weight = (int)( 0.5 + dWeight * (double)(1<<log2Denom) ); const int offset = (int)( ((currDC<<log2Denom) - ((int64_t)weight * refDC) + (int64_t)realOffset) >> realLog2Denom ); int clippedOffset; if(isChroma(compID)) // Chroma offset range limination { const int pred = ( range - ( ( range*weight)>>(log2Denom) ) ); const int deltaOffset = Clip3( -4*range, 4*range-1, (offset - pred) ); // signed 10bit clippedOffset = Clip3( -range, range-1, (deltaOffset + pred) ); // signed 8bit } else // Luma offset range limitation { clippedOffset = Clip3( -range, range-1, offset); } // Weighting factor limitation const int defaultWeight = (1<<log2Denom); const int deltaWeight = (weight - defaultWeight); if(deltaWeight >= range || deltaWeight < -range) { return false; } m_wp[refList][refIdxTemp][comp].bPresentFlag = true; m_wp[refList][refIdxTemp][comp].iWeight = weight; m_wp[refList][refIdxTemp][comp].iOffset = clippedOffset; m_wp[refList][refIdxTemp][comp].uiLog2WeightDenom = log2Denom; } } } return true; } /** select whether weighted pred enables or not. * \param Slice *slice * \param log2Denom * \returns bool */ bool WeightPredAnalysis::xSelectWPHistExtClip(Slice *const slice, const int log2Denom, const bool bDoEnhancement, const bool bClipInitialSADWP, const bool bUseHistogram) { const CPelUnitBuf pPic = slice->getPic()->getOrigBuf(); const int defaultWeight = 1<<log2Denom; const int numPredDir = slice->isInterP() ? 1 : 2; const bool useHighPrecision = slice->getSPS()->getSpsRangeExtension().getHighPrecisionOffsetsEnabledFlag(); CHECK(numPredDir > int(NUM_REF_PIC_LIST_01), "Invalid reference picture list"); for ( int refList = 0; refList < numPredDir; refList++ ) { const RefPicList eRefPicList = ( refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); for ( int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(eRefPicList); refIdxTemp++ ) { bool useChromaWeight = false; for (int comp = 0; comp < ::getNumberValidComponents(pPic.chromaFormat); comp++) { const ComponentID compID = ComponentID(comp); const Pel *pRef = slice->getRefPic(eRefPicList, refIdxTemp)->getRecoBuf().get(compID).buf; const int refStride = slice->getRefPic(eRefPicList, refIdxTemp)->getRecoBuf().get(compID).stride;; const CPelBuf compBuf = pPic.get( compID ); const Pel *pOrg = compBuf.buf; const int orgStride = compBuf.stride; const int width = compBuf.width; const int height = compBuf.height; const int bitDepth = slice->getSPS()->getBitDepth(toChannelType(compID)); WPScalingParam &wp = m_wp[refList][refIdxTemp][compID]; int weight = wp.iWeight; int offset = wp.iOffset; int weightDef = defaultWeight; int offsetDef = 0; // calculate SAD costs with/without wp for luma const int64_t SADnoWP = xCalcSADvalueWPOptionalClip(bitDepth, pOrg, pRef, width, height, orgStride, refStride, log2Denom, defaultWeight, 0, useHighPrecision, bClipInitialSADWP); if (SADnoWP > 0) { const int64_t SADWP = xCalcSADvalueWPOptionalClip(bitDepth, pOrg, pRef, width, height, orgStride, refStride, log2Denom, weight, offset, useHighPrecision, bClipInitialSADWP); const double dRatioSAD = (double)SADWP / (double)SADnoWP; double dRatioSr0SAD = std::numeric_limits<double>::max(); double dRatioSrSAD = std::numeric_limits<double>::max(); if (bUseHistogram) { std::vector<int> histogramOrg;// = pPic->getHistogram(compID); std::vector<int> histogramRef;// = slice->getRefPic(eRefPicList, refIdxTemp)->getPicYuvRec()->getHistogram(compID); std::vector<int> searchedHistogram; // Compute histograms xCalcHistogram(pOrg, histogramOrg, width, height, orgStride, 1 << bitDepth); xCalcHistogram(pRef, histogramRef, width, height, refStride, 1 << bitDepth); // Do a histogram search around DC WP parameters; resulting distortion and 'searchedHistogram' is discarded xSearchHistogram(histogramOrg, histogramRef, searchedHistogram, bitDepth, log2Denom, weight, offset, useHighPrecision, compID); // calculate updated WP SAD const int64_t SADSrWP = xCalcSADvalueWP(bitDepth, pOrg, pRef, width, height, orgStride, refStride, log2Denom, weight, offset, useHighPrecision); dRatioSrSAD = (double)SADSrWP / (double)SADnoWP; if (bDoEnhancement) { // Do the same around the default ones; resulting distortion and 'searchedHistogram' is discarded xSearchHistogram(histogramOrg, histogramRef, searchedHistogram, bitDepth, log2Denom, weightDef, offsetDef, useHighPrecision, compID); // calculate updated WP SAD const int64_t SADSr0WP = xCalcSADvalueWP(bitDepth, pOrg, pRef, width, height, orgStride, refStride, log2Denom, weightDef, offsetDef, useHighPrecision); dRatioSr0SAD = (double)SADSr0WP / (double)SADnoWP; } } if(std::min(dRatioSr0SAD, std::min(dRatioSAD, dRatioSrSAD)) >= WEIGHT_PRED_SAD_RELATIVE_TO_NON_WEIGHT_PRED_SAD) { wp.bPresentFlag = false; wp.iOffset = 0; wp.iWeight = defaultWeight; wp.uiLog2WeightDenom = log2Denom; } else { if (compID != COMPONENT_Y) { useChromaWeight = true; } if (dRatioSr0SAD < dRatioSrSAD && dRatioSr0SAD < dRatioSAD) { wp.bPresentFlag = true; wp.iOffset = offsetDef; wp.iWeight = weightDef; wp.uiLog2WeightDenom = log2Denom; } else if (dRatioSrSAD < dRatioSAD) { wp.bPresentFlag = true; wp.iOffset = offset; wp.iWeight = weight; wp.uiLog2WeightDenom = log2Denom; } } } else // (SADnoWP <= 0) { wp.bPresentFlag = false; wp.iOffset = 0; wp.iWeight = defaultWeight; wp.uiLog2WeightDenom = log2Denom; } } for (int comp = 1; comp < ::getNumberValidComponents(pPic.chromaFormat); comp++) { m_wp[refList][refIdxTemp][comp].bPresentFlag = useChromaWeight; } } } return true; } //! select whether weighted pred enables or not. bool WeightPredAnalysis::xSelectWP(Slice *const slice, const int log2Denom) { const CPelUnitBuf pPic = slice->getPic()->getOrigBuf(); const int defaultWeight = 1<<log2Denom; const int numPredDir = slice->isInterP() ? 1 : 2; const bool useHighPrecisionPredictionWeighting = slice->getSPS()->getSpsRangeExtension().getHighPrecisionOffsetsEnabledFlag(); CHECK(numPredDir > int(NUM_REF_PIC_LIST_01), "Invalid reference picture list"); for ( int refList = 0; refList < numPredDir; refList++ ) { const RefPicList eRefPicList = ( refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); for ( int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(eRefPicList); refIdxTemp++ ) { int64_t SADWP = 0, SADnoWP = 0; for (int comp = 0; comp < ::getNumberValidComponents(pPic.chromaFormat); comp++) { const ComponentID compID = ComponentID(comp); const CPelBuf compBuf = pPic.get( compID ); const Pel *pRef = slice->getRefPic(eRefPicList, refIdxTemp)->getRecoBuf().get( compID ).buf; const int refStride = slice->getRefPic(eRefPicList, refIdxTemp)->getRecoBuf().get( compID ).stride; const Pel *pOrg = compBuf.buf; const int orgStride = compBuf.stride; const int width = compBuf.width; const int height = compBuf.height; const int bitDepth = slice->getSPS()->getBitDepth(toChannelType(compID)); // calculate SAD costs with/without wp for luma SADWP += xCalcSADvalueWP(bitDepth, pOrg, pRef, width, height, orgStride, refStride, log2Denom, m_wp[refList][refIdxTemp][compID].iWeight, m_wp[refList][refIdxTemp][compID].iOffset, useHighPrecisionPredictionWeighting); SADnoWP += xCalcSADvalueWP(bitDepth, pOrg, pRef, width, height, orgStride, refStride, log2Denom, defaultWeight, 0, useHighPrecisionPredictionWeighting); } const double dRatio = SADnoWP > 0 ? (((double)SADWP / (double)SADnoWP)) : std::numeric_limits<double>::max(); const double dMaxRatio = double( 0.99 ); if(dRatio >= dMaxRatio) { for(int comp=0; comp < ::getNumberValidComponents(pPic.chromaFormat); comp++) { WPScalingParam &wp=m_wp[refList][refIdxTemp][comp]; wp.bPresentFlag = false; wp.iOffset = 0; wp.iWeight = defaultWeight; wp.uiLog2WeightDenom = log2Denom; } } } } return true; } // Alternatively, a SSE-based measure could be used instead. // The respective function has been removed as it currently redundant. static int64_t xCalcSADvalueWP(const int bitDepth, const Pel *pOrgPel, const Pel *pRefPel, const int width, const int height, const int orgStride, const int refStride, const int log2Denom, const int weight, const int offset, const bool useHighPrecision) { //const int64_t iSize = iWidth*iHeight; const int64_t realLog2Denom = useHighPrecision ? log2Denom : (log2Denom + (bitDepth - 8)); const int64_t realOffset = ((int64_t)offset)<<realLog2Denom; int64_t SAD = 0; for( int y = 0; y < height; y++ ) { for( int x = 0; x < width; x++ ) { SAD += abs(( ((int64_t)pOrgPel[x] << (int64_t) log2Denom) - ( (int64_t) pRefPel[x] * (int64_t) weight + (realOffset) ) ) ); } pOrgPel += orgStride; pRefPel += refStride; } //return (iSAD/iSize); return SAD; } static int64_t xCalcSADvalueWPOptionalClip(const int bitDepth, const Pel *pOrgPel, const Pel *pRefPel, const int width, const int height, const int orgStride, const int refStride, const int log2Denom, const int weight, const int offset, const bool useHighPrecision, const bool clipped) { int64_t SAD = 0; if (clipped) { const int64_t realLog2Denom = useHighPrecision ? 0 : (bitDepth - 8); const int64_t realOffset = (int64_t)offset<<realLog2Denom; const int64_t roundOffset = (log2Denom == 0) ? 0 : 1 << (log2Denom - 1); const int64_t minValue = 0; const int64_t maxValue = (1 << bitDepth) - 1; for( int y = 0; y < height; y++ ) { for( int x = 0; x < width; x++ ) { int64_t scaledValue = Clip3(minValue, maxValue, ((((int64_t) pRefPel[x] * (int64_t) weight + roundOffset) ) >> (int64_t) log2Denom) + realOffset); //ClipPel SAD += abs((int64_t)pOrgPel[x] - scaledValue); } pOrgPel += orgStride; pRefPel += refStride; } } else { //const int64_t iSize = iWidth*iHeight; const int64_t realLog2Denom = useHighPrecision ? log2Denom : (log2Denom + (bitDepth - 8)); const int64_t realOffset = ((int64_t)offset)<<realLog2Denom; for( int y = 0; y < height; y++ ) { for( int x = 0; x < width; x++ ) { SAD += abs(( ((int64_t)pOrgPel[x] << (int64_t) log2Denom) - ( (int64_t) pRefPel[x] * (int64_t) weight + (realOffset) ) ) ); } pOrgPel += orgStride; pRefPel += refStride; } } return SAD; }