Newer
Older

Karsten Suehring
committed
{
bits >>= 1;
y = x >> bits;
if( y )
{
x = y;
msb += bits;
}
}
msb += y;
return msb;
}
void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths)
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
const int height = yuvDst.Y().height;
const int width = yuvDst.Y().width;
int heightG = height + 2 * BIO_EXTEND_SIZE;
int widthG = width + 2 * BIO_EXTEND_SIZE;
int offsetPos = widthG*BIO_EXTEND_SIZE + BIO_EXTEND_SIZE;
Pel* gradX0 = m_gradX0;
Pel* gradX1 = m_gradX1;
Pel* gradY0 = m_gradY0;
Pel* gradY1 = m_gradY1;
int stridePredMC = widthG + 2;
const Pel* srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
const Pel* srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
const int src0Stride = stridePredMC;
const int src1Stride = stridePredMC;
Pel* dstY = yuvDst.Y().buf;
const int dstStride = yuvDst.Y().stride;
const Pel* srcY0Temp = srcY0;
const Pel* srcY1Temp = srcY1;
for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]);
Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
for (int y = 0; y< height; y++)
padStr[-1] = padStr[0];
padStr[width] = padStr[width - 1];
padStr += stridePredMC;
padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
}
const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
const int limit = (1<<(std::max<int>(5, bitDepth - 7)));

Karsten Suehring
committed
int* dotProductTemp1 = m_dotProduct1;
int* dotProductTemp2 = m_dotProduct2;
int* dotProductTemp3 = m_dotProduct3;
int* dotProductTemp5 = m_dotProduct5;
int* dotProductTemp6 = m_dotProduct6;
xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
int xUnit = (width >> 2);
int yUnit = (height >> 2);
Pel *dstY0 = dstY;
gradX0 = m_gradX0; gradX1 = m_gradX1;
gradY0 = m_gradY0; gradY1 = m_gradY1;
for (int yu = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++)
{
#if !JVET_O0055_INT_DMVR_DIS_BDOF
if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
{
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4));
dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng);
#endif
int sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
int tmpx = 0, tmpy = 0;

Karsten Suehring
committed
dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
if (sGx2 > 0)
{
tmpx = rightShiftMSB(sGxdI << 3, sGx2);
tmpx = Clip3(-limit, limit, tmpx);
}
if (sGy2 > 0)
{
int mainsGxGy = sGxGy >> 12;
int secsGxGy = sGxGy & ((1 << 12) - 1);
int tmpData = tmpx * mainsGxGy;
tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
tmpy = Clip3(-limit, limit, tmpy);
}
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
} // xu
} // yu
}
bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
{
const int width = pu.lwidth();
const int height = pu.lheight();
const int clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int xUnit = (width >> 2);
const int yUnit = (height >> 2);
m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
m_bioDistThres >>= distortionShift;
m_bioSubBlkDistThres >>= distortionShift;
DistParam cDistParam;
Distortion dist = 0;
for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++, blkIdx++)
{
const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
dist += m_bioPredSubBlkDist[blkIdx];
}
}
return (dist >= m_bioDistThres);
}
void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
}
void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
{
g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
}
void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
{
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
}
void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
}
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp /*= NULL*/)
#else
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )

Karsten Suehring
committed
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
{
if( pu.cu->GBiIdx != GBI_DEFAULT )
{
CHECK(bioApplied, "GBi is disallowed with BIO");
pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->copyFrom(pcYuvDst);
#endif
if (bioApplied)
const int src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const int src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
#if JVET_O0055_INT_DMVR_DIS_BDOF
bool bioEnabled = true;
#else
bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
#endif
if (bioEnabled)
{
applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
#endif
}
else
{
pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]);
#endif
Takeshi Chujoh
committed
if (pu.cs->pps->getWPBiPred())
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
WPScalingParam *pwp0;
WPScalingParam *pwp1;
getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1);
if (!bioApplied)
{
addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y);
}
addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb);
addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr);
}
else
{
pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
}
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
{
if (bioApplied)
{
yuvDstTmp->bufs[1].copyFrom(pcYuvDst.bufs[1]);
yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]);
}
else
yuvDstTmp->copyFrom(pcYuvDst);
}
#endif

Karsten Suehring
committed
}
else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc0 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->copyFrom(pcYuvDst);
#endif

Karsten Suehring
committed
}
else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc1 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->copyFrom(pcYuvDst);
#endif

Karsten Suehring
committed
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
, PelUnitBuf* predBufWOBIO /*= NULL*/
#endif

Karsten Suehring
committed
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(predBufWOBIO && pu.mhIntraFlag, "the case should not happen!");
#endif
if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0)
#if !JVET_O0258_REMOVE_CHROMA_IBC_FOR_DUALTREE
if (!luma && chroma)
{
xChromaMC(pu, predBuf);
return;
}
else // (luma && !chroma)
{
xPredInterUni(pu, eRefPicList, predBuf, false
, false
, luma, chroma);
return;
#if !JVET_O0258_REMOVE_CHROMA_IBC_FOR_DUALTREE

Karsten Suehring
committed
CodingStructure &cs = *pu.cs;
const PPS &pps = *cs.pps;
const SliceType sliceType = cs.slice->getSliceType();
if( eRefPicList != REF_PIC_LIST_X )
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(predBufWOBIO != NULL, "the case should not happen!");
#endif

Karsten Suehring
committed
if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
{
xPredInterUni ( pu, eRefPicList, predBuf, true
, false
, true, true
);

Karsten Suehring
committed
xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
}
else
{
xPredInterUni( pu, eRefPicList, predBuf, false
, false
, true, true
);

Karsten Suehring
committed
}
}
else
{
CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
WPScalingParam *wp0;
WPScalingParam *wp1;
int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
bool bioApplied = false;
const Slice &slice = *pu.cs->slice;
#if JVET_O1140_SLICE_DISABLE_BDOF_DMVR_FLAG
if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->slice->getDisBdofDmvrFlag()))
#else
if (pu.cs->sps->getBDOFEnabledFlag())
{
if (pu.cu->affine || m_subPuMC)
{
bioApplied = false;
}
else
{
const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE);
const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
if (biocheck0
&& biocheck1
&& PU::isBiPredFromDifferentDir(pu)
)
{
bioApplied = true;
}
}
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (bioApplied && pu.mhIntraFlag)
{
bioApplied = false;
}
#endif
if (pu.cu->cs->sps->getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT)
if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag)
bioApplied = false;
}
}
bool dmvrApplied = false;
dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu);
if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied))
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
xSubPuBio(pu, predBuf, eRefPicList, predBufWOBIO);
#else
if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)

Karsten Suehring
committed
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(predBufWOBIO != NULL, "the case should not happen!");
#endif

Karsten Suehring
committed
xSubPuMC( pu, predBuf, eRefPicList );
}
else if( xCheckIdenticalMotion( pu ) )
{
xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
, false
, true, true
);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (predBufWOBIO)
predBufWOBIO->copyFrom(predBuf);
#endif

Karsten Suehring
committed
}
else
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
xPredInterBi(pu, predBuf, predBufWOBIO);
#else

Karsten Suehring
committed
xPredInterBi( pu, predBuf );

Karsten Suehring
committed
}
}
return;
}
void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList

Karsten Suehring
committed
{
for( auto &pu : CU::traversePUs( cu ) )
{
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
pu.mvRefine = true;
motionCompensation( pu, predBuf, eRefPicList
pu.mvRefine = false;

Karsten Suehring
committed
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/

Karsten Suehring
committed
{
PelUnitBuf predBuf = pu.cs->getPredBuf( pu );
motionCompensation( pu, predBuf, eRefPicList

Karsten Suehring
committed
}
int InterPrediction::rightShiftMSB(int numer, int denom)
{
int d;
int msbIdx = 0;
for (msbIdx = 0; msbIdx<32; msbIdx++)
{
if (denom < ((int)1 << msbIdx))
{
break;
}
}

Karsten Suehring
committed
int shiftIdx = msbIdx - 1;
d = (numer >> shiftIdx);

Karsten Suehring
committed

Karsten Suehring
committed
void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
{
for( auto &pu : CU::traversePUs( cu ) )
{
const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) );
PelUnitBuf tmpTriangleBuf = m_triangleBuf.getBuf( localUnitArea );
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
PU::spanMotionInfo( pu );
motionCompensation( pu, tmpTriangleBuf );
{
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
{
printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
}
}
PU::spanMotionInfo( pu );
motionCompensation( pu, predBuf );
{
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
{
printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
}
}
weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
if( channel == CHANNEL_TYPE_LUMA )
{
Yuling Hsiao
committed
#if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
#else
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
Yuling Hsiao
committed
#endif
}
else if( channel == CHANNEL_TYPE_CHROMA )
{
Yuling Hsiao
committed
#if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
#else
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
Yuling Hsiao
committed
#endif
Yuling Hsiao
committed
#if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
#else
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
Yuling Hsiao
committed
#endif
Yuling Hsiao
committed
#if !JVET_O0280_SIMD_TRIANGLE_WEIGHTING
void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
{
Pel* dst = predDst .get(compIdx).buf;
Pel* src0 = predSrc0.get(compIdx).buf;
Pel* src1 = predSrc1.get(compIdx).buf;
int32_t strideDst = predDst .get(compIdx).stride - width;
int32_t strideSrc0 = predSrc0.get(compIdx).stride - width;
int32_t strideSrc1 = predSrc1.get(compIdx).stride - width;
const char log2WeightBase = 3;
const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx];
const int32_t clipbd = clipRng.bd;
const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
const int32_t ratioWH = (width > height) ? (width / height) : 1;
const int32_t ratioHW = (width > height) ? 1 : (height / width);
const bool longWeight = (compIdx == COMPONENT_Y);
const int32_t weightedLength = longWeight ? 7 : 3;
int32_t weightedStartPos = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1;
int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
Pel tmpPelWeighted;
int32_t weightIdx;
int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
src0++;
src1++;
tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(width - 1));
weightIdx += abs(weightedStartPos) / ratioWH;
for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
*dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
src0++;
src1++;
dst += strideDst;
src0 += strideSrc0;
src1 += strideSrc1;
weightedStartPos += weightedPosoffset;
weightedEndPos += weightedPosoffset;
Yuling Hsiao
committed
#endif
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
#if JVET_O0297_DMVR_PADDING // For Dec speedup
void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma)
{
int offset, width, height;
Mv cMv;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
int start = 0;
int end = MAX_NUM_COMPONENT;
start = forLuma ? 0 : 1;
end = forLuma ? 1 : MAX_NUM_COMPONENT;
for (int compID = start; compID < end; compID++)
{
cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
width = pcPad.bufs[compID].width;
height = pcPad.bufs[compID].height;
offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
width += (filtersize - 1);
height += (filtersize - 1);
cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
-(((filtersize >> 1) - 1) << mvshiftTemp));
clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
/* Pre-fetch similar to HEVC*/
{
CPelBuf refBuf;
Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()));
PelBuf &dstBuf = pcPad.bufs[compID];
g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
}
}
}
void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
{
int offset = 0, width, height;
int padsize;
Mv cMv;
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
width = pcPad.bufs[compID].width;
height = pcPad.bufs[compID].height;
offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
width += (filtersize - 1);
height += (filtersize - 1);
/*padding on all side of size DMVR_PAD_LENGTH*/
{
g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
}
}
}
#else
void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
{
int offset, width, height;
int padsize;
Mv cMv;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
width = pcPad.bufs[compID].width;
height = pcPad.bufs[compID].height;
offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
width += (filtersize - 1);
height += (filtersize - 1);
cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
-(((filtersize >> 1) - 1) << mvshiftTemp));
bool wrapRef = false;
if( pu.cs->sps->getWrapAroundEnabledFlag() )
{
wrapRef = wrapClipMv( cMv, pu.blocks[0].pos(), pu.blocks[0].size(), pu.cs->sps);
}
else {
clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
}
/* Pre-fetch similar to HEVC*/
{
CPelBuf refBuf;
Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef);
PelBuf &dstBuf = pcPad.bufs[compID];
g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
JVET_J0090_SET_REF_PICTURE( refPic, (ComponentID)compID );
for ( int row = 0 ; row < height ; row++ )
{
for ( int col = 0 ; col < width ; col++ )
{
JVET_J0090_CACHE_ACCESS( ((Pel *)refBuf.buf) + row * refBuf.stride + col, __FILE__, __LINE__ );
}
}
#endif
}
/*padding on all side of size DMVR_PAD_LENGTH*/
{
g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
}
}
}
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
inline int32_t div_for_maxq7(int64_t N, int64_t D)
{
int32_t sign, q;
sign = 0;
if (N < 0)
{
sign = 1;
N = -N;
}
q = 0;
D = (D << 3);
if (N >= D)
{
N -= D;
q++;
}
q = (q << 1);
D = (D >> 1);
if (N >= D)
{
N -= D;
q++;
}
q = (q << 1);
if (N >= (D >> 1))
q++;
if (sign)
return (-q);
return(q);
}
void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
{
int64_t numerator, denominator;
int32_t mvDeltaSubPel;
int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl);
denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
{
if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
{
mvDeltaSubPel = div_for_maxq7(numerator, denominator);
deltaMv[0] = (mvDeltaSubPel);
}
else
{
if (sadBuffer[1] == sadBuffer[0])
{
deltaMv[0] = -8;// half pel
}
else
{
deltaMv[0] = 8;// half pel
}
}
}
/*vertical*/
numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl);
denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
if (0 != denominator)
{
if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
{
mvDeltaSubPel = div_for_maxq7(numerator, denominator);
deltaMv[1] = (mvDeltaSubPel);
}
else
{
if (sadBuffer[2] == sadBuffer[0])
{
deltaMv[1] = -8;// half pel
}
else
{
deltaMv[1] = 8;// half pel
}
}
}
return;
}
void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height)
{
const int32_t refStrideL0 = m_biLinearBufStride;
const int32_t refStrideL1 = m_biLinearBufStride;
Pel *pRefL0Orig = pRefL0;
Pel *pRefL1Orig = pRefL1;
int32_t sadOffset = ((m_pSearchOffset[nIdx].getVer() * ((2 * DMVR_NUM_ITERATION) + 1)) + m_pSearchOffset[nIdx].getHor());
pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
if (*(pSADsArray + sadOffset) == MAX_UINT64)
{
const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
minCost = *(pSADsArray + sadOffset);
deltaMV[0] = m_pSearchOffset[nIdx].getHor();
deltaMV[1] = m_pSearchOffset[nIdx].getVer();
}
}
}
void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied
, const Mv mergeMV[NUM_REF_PIC_LIST_01]
#if JVET_O0297_DMVR_PADDING // For Dec speedup
, bool blockMoved
#endif
)
{
int offset, deltaIntMvX, deltaIntMvY;
PelUnitBuf pcYUVTemp = pcYuvSrc0;
PelUnitBuf pcPadTemp = pcPad0;
/*always high precision MVs are used*/
for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
{
RefPicList refId = (RefPicList)k;
Mv cMv = pu.mv[refId];
m_iRefListIdx = refId;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
Mv cMvClipped = cMv;
clipMv(cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
Mv startMv = mergeMV[refId];
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvForMCTSConstraint( pu, startMv, MV_PRECISION_INTERNAL ) )
{
const Area& tileArea = pu.cs->picture->mctsInfo.getTileArea();
printf( "Attempt an access over tile boundary at block %d,%d %d,%d with MV %d,%d (in Tile TL: %d,%d BR: %d,%d)\n",
pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), startMv.getHor(), startMv.getVer(), tileArea.topLeft().x, tileArea.topLeft().y, tileArea.bottomRight().x, tileArea.bottomRight().y );
THROW( "MCTS constraint failed!" );
}
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
#if JVET_O0297_DMVR_PADDING // For Dec speedup
Pel *srcBufPelPtr = NULL;
int pcPadstride = 0;
if (blockMoved || (compID == 0))
{
pcPadstride = pcPadTemp.bufs[compID].stride;
int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
int leftPixelExtra;
if (compID == COMPONENT_Y)
{
leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
}
else
{
leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
}
PelBuf &srcBuf = pcPadTemp.bufs[compID];
deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
(startMv.getHor() >> mvshiftTemp);
deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
(startMv.getVer() >> mvshiftTemp);
CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
offset += (deltaIntMvX);
srcBufPelPtr = (srcBuf.buf + offset);
}
xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
bioApplied, false, 0, 0, 0, srcBufPelPtr, pcPadstride);
#else
int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
int leftPixelExtra;
if (compID == COMPONENT_Y)
{
leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
}
else
{
leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
}
deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
(startMv.getHor() >> mvshiftTemp);
deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
(startMv.getVer() >> mvshiftTemp);
CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
offset += (deltaIntMvX);
PelBuf &srcBuf = pcPadTemp.bufs[compID];
xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
}
pcYUVTemp = pcYuvSrc1;
pcPadTemp = pcPad1;
}
}
uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, const Pel* pRef, uint32_t orgStride, int width, int height)
{
DistParam cDistParam;
cDistParam.applyWeight = false;
cDistParam.useMR = false;
m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1);
uint64_t uiCost = cDistParam.distFunc(cDistParam);
return uiCost;
}
void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray)
int sadStride = (((2 * DMVR_NUM_ITERATION) + 1));
uint64_t sadbuffer[5];
if (notZeroCost && (abs(totalDeltaMV[0]) != (2 << MV_FRACTIONAL_BITS_INTERNAL))
&& (abs(totalDeltaMV[1]) != (2 << MV_FRACTIONAL_BITS_INTERNAL)))
sadbuffer[0] = pSADsArray[0];
sadbuffer[1] = pSADsArray[-1];
sadbuffer[2] = pSADsArray[-sadStride];
sadbuffer[3] = pSADsArray[1];
sadbuffer[4] = pSADsArray[sadStride];
xSubPelErrorSrfc(sadbuffer, tempDeltaMv);
totalDeltaMV[0] += tempDeltaMv[0];
totalDeltaMV[1] += tempDeltaMv[1];
}
}
void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
{
const int refIdx0 = pu.refIdx[0];
const int refIdx1 = pu.refIdx[1];
/*use merge MV as starting MV*/
Mv mergeMVL0(pu.mv[REF_PIC_LIST_0]);
Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]);