Newer
Older
gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
} // xu
} // yu
}
bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
{
const int width = pu.lwidth();
const int height = pu.lheight();
const int clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int xUnit = (width >> 2);
const int yUnit = (height >> 2);
m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
m_bioDistThres >>= distortionShift;
m_bioSubBlkDistThres >>= distortionShift;
DistParam cDistParam;
Distortion dist = 0;
for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++, blkIdx++)
{
const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
dist += m_bioPredSubBlkDist[blkIdx];
}
}
return (dist >= m_bioDistThres);
}
void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
}
void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
{
g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
}
void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
{
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
}
void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
}
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )

Karsten Suehring
committed
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
{
if( pu.cu->GBiIdx != GBI_DEFAULT )
{
CHECK(bioApplied, "GBi is disallowed with BIO");
pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
return;
}
if (bioApplied)
const int src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const int src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
if (bioEnabled)
{
applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
}
else
{
pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
}
}
pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);

Karsten Suehring
committed
}
else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc0 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
}
else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc1 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList

Karsten Suehring
committed
{
if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0)
{
if (!luma && chroma)
{
xChromaMC(pu, predBuf);
return;
}
else // (luma && !chroma)
{
xPredInterUni(pu, eRefPicList, predBuf, false
, false
, luma, chroma);
return;
}
}
// else, go with regular MC below

Karsten Suehring
committed
CodingStructure &cs = *pu.cs;
const PPS &pps = *cs.pps;
const SliceType sliceType = cs.slice->getSliceType();
if( eRefPicList != REF_PIC_LIST_X )
{
if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
{
xPredInterUni ( pu, eRefPicList, predBuf, true
, false
, true, true
);

Karsten Suehring
committed
xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
}
else
{
xPredInterUni( pu, eRefPicList, predBuf, false
, false
, true, true
);

Karsten Suehring
committed
}
}
else
{
if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)

Karsten Suehring
committed
{
xSubPuMC( pu, predBuf, eRefPicList );
}
else if( xCheckIdenticalMotion( pu ) )
{
xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
, false
, true, true
);

Karsten Suehring
committed
}
else
{
xPredInterBi( pu, predBuf );
}
}
return;
}
void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList

Karsten Suehring
committed
{
for( auto &pu : CU::traversePUs( cu ) )
{
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
pu.mvRefine = true;
motionCompensation( pu, predBuf, eRefPicList
pu.mvRefine = false;

Karsten Suehring
committed
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/

Karsten Suehring
committed
{
PelUnitBuf predBuf = pu.cs->getPredBuf( pu );
motionCompensation( pu, predBuf, eRefPicList

Karsten Suehring
committed
}
int InterPrediction::rightShiftMSB(int numer, int denom)
{
int d;
int msbIdx = 0;
for (msbIdx = 0; msbIdx<32; msbIdx++)
{
if (denom < ((int)1 << msbIdx))
{
break;
}
}

Karsten Suehring
committed
int shiftIdx = msbIdx - 1;
d = (numer >> shiftIdx);

Karsten Suehring
committed

Karsten Suehring
committed
void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
{
for( auto &pu : CU::traversePUs( cu ) )
{
const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) );
PelUnitBuf tmpTriangleBuf = m_triangleBuf.getBuf( localUnitArea );
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
PU::spanMotionInfo( pu );
motionCompensation( pu, tmpTriangleBuf );
{
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
{
printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
}
}
PU::spanMotionInfo( pu );
motionCompensation( pu, predBuf );
{
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
{
printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
}
}
weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
if( channel == CHANNEL_TYPE_LUMA )
{
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
}
else if( channel == CHANNEL_TYPE_CHROMA )
{
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
}
else
{
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
}
void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
{
Pel* dst = predDst .get(compIdx).buf;
Pel* src0 = predSrc0.get(compIdx).buf;
Pel* src1 = predSrc1.get(compIdx).buf;
int32_t strideDst = predDst .get(compIdx).stride - width;
int32_t strideSrc0 = predSrc0.get(compIdx).stride - width;
int32_t strideSrc1 = predSrc1.get(compIdx).stride - width;
const char log2WeightBase = 3;
const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx];
const int32_t clipbd = clipRng.bd;
const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
const int32_t ratioWH = (width > height) ? (width / height) : 1;
const int32_t ratioHW = (width > height) ? 1 : (height / width);
const bool longWeight = (compIdx == COMPONENT_Y) || ( predDst.chromaFormat == CHROMA_444 );
const int32_t weightedLength = longWeight ? 7 : 3;
int32_t weightedStartPos = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1;
int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
Pel tmpPelWeighted;
int32_t weightIdx;
int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
src0++;
src1++;
tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(width - 1));
weightIdx += abs(weightedStartPos) / ratioWH;
for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
*dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
src0++;
src1++;
dst += strideDst;
src0 += strideSrc0;
src1 += strideSrc1;
weightedStartPos += weightedPosoffset;
weightedEndPos += weightedPosoffset;
void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
{
int offset, width, height;
int padsize;
Mv cMv;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
pcPad.bufs[compID].stride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
width = pcPad.bufs[compID].width;
height = pcPad.bufs[compID].height;
offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
width += (filtersize - 1);
height += (filtersize - 1);
cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
-(((filtersize >> 1) - 1) << mvshiftTemp));
clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
/* Pre-fetch similar to HEVC*/
{
CPelBuf refBuf;
Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()));
PelBuf &dstBuf = pcPad.bufs[compID];
g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
}
/*padding on all side of size DMVR_PAD_LENGTH*/
{
g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
}
}
}
inline int32_t div_for_maxq7(int64_t N, int64_t D)
{
int32_t sign, q;
sign = 0;
if (N < 0)
{
sign = 1;
N = -N;
}
q = 0;
D = (D << 3);
if (N >= D)
{
N -= D;
q++;
}
q = (q << 1);
D = (D >> 1);
if (N >= D)
{
N -= D;
q++;
}
q = (q << 1);
if (N >= (D >> 1))
q++;
if (sign)
return (-q);
return(q);
}
void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
{
int64_t numerator, denominator;
int32_t mvDeltaSubPel;
int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl);
denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
{
if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
{
mvDeltaSubPel = div_for_maxq7(numerator, denominator);
deltaMv[0] = (mvDeltaSubPel);
}
else
{
if (sadBuffer[1] == sadBuffer[0])
{
deltaMv[0] = -8;// half pel
}
else
{
deltaMv[0] = 8;// half pel
}
}
}
/*vertical*/
numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl);
denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
if (0 != denominator)
{
if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
{
mvDeltaSubPel = div_for_maxq7(numerator, denominator);
deltaMv[1] = (mvDeltaSubPel);
}
else
{
if (sadBuffer[2] == sadBuffer[0])
{
deltaMv[1] = -8;// half pel
}
else
{
deltaMv[1] = 8;// half pel
}
}
}
return;
}
void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *deltaMV, uint64_t *pSADsArray, int width, int height)
{
const int32_t refStrideL0 = m_biLinearBufStride;
const int32_t refStrideL1 = m_biLinearBufStride;
Pel *pRefL0Orig = pRefL0;
Pel *pRefL1Orig = pRefL1;
int32_t sadOffset = ((m_pSearchOffset[nIdx].getVer() * ((2 * DMVR_NUM_ITERATION) + 1)) + m_pSearchOffset[nIdx].getHor());
pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
if (*(pSADsArray + sadOffset) == MAX_UINT64)
{
const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
minCost = *(pSADsArray + sadOffset);
deltaMV[0] = m_pSearchOffset[nIdx].getHor();
deltaMV[1] = m_pSearchOffset[nIdx].getVer();
}
}
}
void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied
, const Mv mergeMV[NUM_REF_PIC_LIST_01]
)
{
int offset, deltaIntMvX, deltaIntMvY;
PelUnitBuf pcYUVTemp = pcYuvSrc0;
PelUnitBuf pcPadTemp = pcPad0;
/*always high precision MVs are used*/
for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
{
RefPicList refId = (RefPicList)k;
Mv cMv = pu.mv[refId];
m_iRefListIdx = refId;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
Mv cMvClipped = cMv;
clipMv(cMvClipped, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
Mv startMv = mergeMV[refId];
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvForMCTSConstraint( pu, startMv, MV_PRECISION_INTERNAL ) )
{
const Area& tileArea = pu.cs->picture->mctsInfo.getTileArea();
printf( "Attempt an access over tile boundary at block %d,%d %d,%d with MV %d,%d (in Tile TL: %d,%d BR: %d,%d)\n",
pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), startMv.getHor(), startMv.getVer(), tileArea.topLeft().x, tileArea.topLeft().y, tileArea.bottomRight().x, tileArea.bottomRight().y );
THROW( "MCTS constraint failed!" );
}
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
int leftPixelExtra;
if (compID == COMPONENT_Y)
{
leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
}
else
{
leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
}
deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
(startMv.getHor() >> mvshiftTemp);
deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
(startMv.getVer() >> mvshiftTemp);
CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
offset += (deltaIntMvX);
PelBuf &srcBuf = pcPadTemp.bufs[compID];
xPredInterBlk((ComponentID)compID, pu, refPic, cMvClipped, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
bioApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
}
pcYUVTemp = pcYuvSrc1;
pcPadTemp = pcPad1;
}
}
uint64_t InterPrediction::xDMVRCost(int bitDepth, Pel* pOrg, uint32_t refStride, const Pel* pRef, uint32_t orgStride, int width, int height)
{
DistParam cDistParam;
cDistParam.applyWeight = false;
cDistParam.useMR = false;
m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, orgStride, refStride, bitDepth, COMPONENT_Y, width, height, 1);
uint64_t uiCost = cDistParam.distFunc(cDistParam);
return uiCost;
}
void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray)
int sadStride = (((2 * DMVR_NUM_ITERATION) + 1));
uint64_t sadbuffer[5];
if (notZeroCost && (abs(totalDeltaMV[0]) != (2 << MV_FRACTIONAL_BITS_INTERNAL))
&& (abs(totalDeltaMV[1]) != (2 << MV_FRACTIONAL_BITS_INTERNAL)))
sadbuffer[0] = pSADsArray[0];
sadbuffer[1] = pSADsArray[-1];
sadbuffer[2] = pSADsArray[-sadStride];
sadbuffer[3] = pSADsArray[1];
sadbuffer[4] = pSADsArray[sadStride];
xSubPelErrorSrfc(sadbuffer, tempDeltaMv);
totalDeltaMV[0] += tempDeltaMv[0];
totalDeltaMV[1] += tempDeltaMv[1];
}
}
void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
{
const int refIdx0 = pu.refIdx[0];
const int refIdx1 = pu.refIdx[1];
/*use merge MV as starting MV*/
Mv mergeMVL0(pu.mv[REF_PIC_LIST_0]);
Mv mergeMVL1(pu.mv[REF_PIC_LIST_1]);
/*Clip the starting MVs*/
clipMv(mergeMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
clipMv(mergeMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
/*L0 MC for refinement*/
{
int offset;
int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
offset += (-(int)DMVR_NUM_ITERATION);
PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
(MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, refIdx0), mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
);
}
/*L1 MC for refinement*/
{
int offset;
int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
offset += (-(int)DMVR_NUM_ITERATION);
PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
(MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)), pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, refIdx1), mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
false, false, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
);
}
}
void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied)
/*Always High Precision*/
int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
/*use merge MV as starting MV*/
Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
m_biLinearBufStride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION));
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
/*L0 Padding*/
m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
/*L1 Padding*/
m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
xinitMC(pu, clpRngs);
// point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
Position puPos = pu.lumaPos();
int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
{
int num = 0;
int yStart = 0;
for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
{
for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
uint64_t minCost = MAX_UINT64;
bool notZeroCost = true;
int16_t totalDeltaMV[2] = { 0,0 };
int16_t deltaMV[2] = { 0, 0 };
uint64_t *pSADsArray;
for (int i = 0; i < (((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)); i++)
{
m_SADsArray[i] = MAX_UINT64;
}
pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1];
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
for (int i = 0; i < iterationCount; i++)
{
deltaMV[0] = 0;
deltaMV[1] = 0;
Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
if (i == 0)
{
minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/))))
{
notZeroCost = false;
break;
}
pSADsArray[0] = minCost;
}
if (!minCost)
{
notZeroCost = false;
break;
}
xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy);
if (deltaMV[0] == 0 && deltaMV[1] == 0)
{
break;
}
totalDeltaMV[0] += deltaMV[0];
totalDeltaMV[1] += deltaMV[1];
pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]);
}
totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
num++;
}
}
}
{
PredictionUnit subPu = pu;
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
PelUnitBuf m_cYuvRefBuffSubCuDMVRL0;
PelUnitBuf m_cYuvRefBuffSubCuDMVRL1;
PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
int x = 0, y = 0;
int xStart = 0, yStart = 0;
int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
{
for (x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
{
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
#if JVET_N0334_MVCLIPPING
subPu.mv[0].clipToStorageBitDepth();
subPu.mv[1].clipToStorageBitDepth();
#endif
m_cYuvRefBuffSubCuDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
m_cYuvRefBuffSubCuDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bioApplied, mergeMv);
subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cb]);
subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cr]);
xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioApplied);
num++;
}
}
}
}

Karsten Suehring
committed
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
void InterPrediction::cacheAssign( CacheModel *cache )
{
m_cacheModel = cache;
m_if.cacheAssign( cache );
m_if.initInterpolationFilter( !cache->isCacheEnable() );
}
#endif
//! \}