Newer
Older
Pel* gradX0 = m_gradX0;
Pel* gradX1 = m_gradX1;
Pel* gradY0 = m_gradY0;
Pel* gradY1 = m_gradY1;
int stridePredMC = widthG + 2;
const Pel* srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
const Pel* srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
const int src0Stride = stridePredMC;
const int src1Stride = stridePredMC;
Pel* dstY = yuvDst.Y().buf;
const int dstStride = yuvDst.Y().stride;
const Pel* srcY0Temp = srcY0;
const Pel* srcY1Temp = srcY1;
for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
#if JVET_M0063_BDOF_FIX
xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]);
#else
xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY);
Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
for (int y = 0; y< height; y++)
padStr[-1] = padStr[0];
padStr[width] = padStr[width - 1];
padStr += stridePredMC;
padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
}
const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
#if JVET_M0063_BDOF_FIX
const int limit = (bitDepth>12)? 2 : ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
#else
const int limit = ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));

Karsten Suehring
committed
int* dotProductTemp1 = m_dotProduct1;
int* dotProductTemp2 = m_dotProduct2;
int* dotProductTemp3 = m_dotProduct3;
int* dotProductTemp5 = m_dotProduct5;
int* dotProductTemp6 = m_dotProduct6;
#if JVET_M0063_BDOF_FIX
xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
#else
xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG);
int xUnit = (width >> 2);
int yUnit = (height >> 2);
Pel *dstY0 = dstY;
gradX0 = m_gradX0; gradX1 = m_gradX1;
gradY0 = m_gradY0; gradY1 = m_gradY1;
for (int yu = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++)
{
if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
{
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4));
dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng);
continue;
}
int sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
int tmpx = 0, tmpy = 0;

Karsten Suehring
committed
dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
if (sGx2 > 0)
{
tmpx = rightShiftMSB(sGxdI << 3, sGx2);
tmpx = Clip3(-limit, limit, tmpx);
}
if (sGy2 > 0)
{
int mainsGxGy = sGxGy >> 12;
int secsGxGy = sGxGy & ((1 << 12) - 1);
int tmpData = tmpx * mainsGxGy;
tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
tmpy = Clip3(-limit, limit, tmpy);
}
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
} // xu
} // yu
}
void InterPrediction::bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng)
{
Pel const* pSrc = NULL;
Pel* pDst = NULL;
int vFilterSize = NTAPS_BILINEAR;
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
int widthTmp = 0;
int heightTmp = 0;
for (int cand = 0; cand < 4; cand++) // top, left, bottom and right
{
if (cand == 0) // top
{
pSrc = src;
pDst = dst;
widthTmp = width;
heightTmp = dim;
}
else if (cand == 1) // left
{
pSrc = src + dim*srcStride;
pDst = dst + dim*dstStride;
widthTmp = dim;
heightTmp = height - 2 * dim;
}
else if (cand == 2) // bottom
{
pSrc = src + (height - dim)*srcStride;
pDst = dst + (height - dim)*dstStride;
widthTmp = width;
heightTmp = dim;
}
else if (cand == 3) // right
{
pSrc = src + dim*srcStride + width - dim;
pDst = dst + dim*dstStride + width - dim;
widthTmp = dim;
heightTmp = height - 2 * dim;
}
if (fracY == 0)
{
m_if.filterHor(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracX, isLast, fmt, clpRng, 1);
}
else if (fracX == 0)
{
m_if.filterVer(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracY, true, isLast, fmt, clpRng, 1);
}
else
{
PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][COMPONENT_Y], Size(width, height));
tmpBuf.stride = width;
m_if.filterHor(COMPONENT_Y, pSrc - ((vFilterSize >> 1) - 1) * srcStride, srcStride, tmpBuf.buf, tmpBuf.stride, widthTmp, heightTmp + vFilterSize - 1, fracX, false, fmt, clpRng, 1);
JVET_J0090_SET_CACHE_ENABLE( false );
m_if.filterVer(COMPONENT_Y, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, pDst, dstStride, widthTmp, heightTmp, fracY, false, isLast, fmt, clpRng, 1);
JVET_J0090_SET_CACHE_ENABLE( true );
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
{
const int width = pu.lwidth();
const int height = pu.lheight();
const int clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int xUnit = (width >> 2);
const int yUnit = (height >> 2);
m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
m_bioDistThres >>= distortionShift;
m_bioSubBlkDistThres >>= distortionShift;
DistParam cDistParam;
Distortion dist = 0;
for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++, blkIdx++)
{
const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
dist += m_bioPredSubBlkDist[blkIdx];
}
}
return (dist >= m_bioDistThres);
}
void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
}
#if JVET_M0063_BDOF_FIX
void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
{
g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
}
void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
{
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
}
#else
void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
{
g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY);
}
void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
{
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG);
}
void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
}
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )

Karsten Suehring
committed
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
{
if( pu.cu->GBiIdx != GBI_DEFAULT )
{
CHECK(bioApplied, "GBi is disallowed with BIO");
pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
return;
}
if (bioApplied)
const int src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const int src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
if (bioEnabled)
{
applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
}
else
{
pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
}
}
pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);

Karsten Suehring
committed
}
else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc0 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
}
else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc1 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList

Karsten Suehring
committed
{
if (!luma || !chroma)
{
if (!luma && chroma)
{
xChromaMC(pu, predBuf);
return;
}
else // (luma && !chroma)
{
xPredInterUni(pu, eRefPicList, predBuf, false
, false
, luma, chroma);
return;
}
}
// else, go with regular MC below

Karsten Suehring
committed
CodingStructure &cs = *pu.cs;
const PPS &pps = *cs.pps;
const SliceType sliceType = cs.slice->getSliceType();
if( eRefPicList != REF_PIC_LIST_X )
{
if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
{
xPredInterUni ( pu, eRefPicList, predBuf, true
, false
, true, true
);

Karsten Suehring
committed
xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
}
else
{
xPredInterUni( pu, eRefPicList, predBuf, false
, false
, true, true
);

Karsten Suehring
committed
}
}
else
{
if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)

Karsten Suehring
committed
{
xSubPuMC( pu, predBuf, eRefPicList );
}
else if( xCheckIdenticalMotion( pu ) )
{
xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
, false
, true, true
);

Karsten Suehring
committed
}
else
{
xPredInterBi( pu, predBuf );
}
}
return;
}
void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList

Karsten Suehring
committed
{
for( auto &pu : CU::traversePUs( cu ) )
{
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
#if JVET_M0147_DMVR
pu.mvRefine = true;
#endif
motionCompensation( pu, predBuf, eRefPicList
, luma, chroma
);
#if JVET_M0147_DMVR
pu.mvRefine = false;
#endif

Karsten Suehring
committed
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/

Karsten Suehring
committed
{
PelUnitBuf predBuf = pu.cs->getPredBuf( pu );
motionCompensation( pu, predBuf, eRefPicList

Karsten Suehring
committed
}
int InterPrediction::rightShiftMSB(int numer, int denom)
{
int d;
int msbIdx = 0;
for (msbIdx = 0; msbIdx<32; msbIdx++)
{
if (denom < ((int)1 << msbIdx))
{
break;
}
}

Karsten Suehring
committed
int shiftIdx = msbIdx - 1;
d = (numer >> shiftIdx);

Karsten Suehring
committed

Karsten Suehring
committed
void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
{
for( auto &pu : CU::traversePUs( cu ) )
{
const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) );
PelUnitBuf tmpTriangleBuf = m_triangleBuf.getBuf( localUnitArea );
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
PU::spanMotionInfo( pu );
motionCompensation( pu, tmpTriangleBuf );
PU::spanMotionInfo( pu );
motionCompensation( pu, predBuf );
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
#else
weightedTriangleBlk( pu, PU::getTriangleWeights(pu, triangleMrgCtx, candIdx0, candIdx1), splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
#else
void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, bool weights, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
if( channel == CHANNEL_TYPE_LUMA )
{
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
}
else if( channel == CHANNEL_TYPE_CHROMA )
{
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
}
else
{
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
}
#else
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, weights, predDst, predSrc0, predSrc1 );
else if( channel == CHANNEL_TYPE_CHROMA )
{
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, weights, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, weights, predDst, predSrc0, predSrc1 );
}
else
{
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, weights, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, weights, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, weights, predDst, predSrc0, predSrc1 );
}
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
#else
void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, const bool weights, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
{
Pel* dst = predDst .get(compIdx).buf;
Pel* src0 = predSrc0.get(compIdx).buf;
Pel* src1 = predSrc1.get(compIdx).buf;
int32_t strideDst = predDst .get(compIdx).stride - width;
int32_t strideSrc0 = predSrc0.get(compIdx).stride - width;
int32_t strideSrc1 = predSrc1.get(compIdx).stride - width;
const char log2WeightBase = 3;
const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx];
const int32_t clipbd = clipRng.bd;
const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
const int32_t ratioWH = (width > height) ? (width / height) : 1;
const int32_t ratioHW = (width > height) ? 1 : (height / width);
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
const bool longWeight = (compIdx == COMPONENT_Y) || ( predDst.chromaFormat == CHROMA_444 );
const int32_t weightedLength = longWeight ? 7 : 3;
const Pel* pelWeighted = (compIdx == COMPONENT_Y) ? g_trianglePelWeightedLuma[splitDir][weights] : g_trianglePelWeightedChroma[predDst.chromaFormat == CHROMA_444 ? 0 : 1][splitDir][weights];
const int32_t weightedLength = (compIdx == COMPONENT_Y) ? g_triangleWeightLengthLuma[weights] : g_triangleWeightLengthChroma[predDst.chromaFormat == CHROMA_444 ? 0 : 1][weights];
int32_t weightedStartPos = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1;
int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
Pel tmpPelWeighted;
int32_t weightIdx;
#else
int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
for( y = 0; y < height; y+= ratioHW )
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
src0++;
src1++;
tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(width - 1));
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
weightIdx = 1;
#else
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
weightIdx += abs(weightedStartPos) / ratioWH;
#else
tmpPelWeighted += abs(weightedStartPos) / ratioWH;
for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
*dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
#else
*dst++ = ClipPel( rightShift( ((*tmpPelWeighted)*(*src0++) + ((8 - (*tmpPelWeighted)) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
#if JVET_M0328_KEEP_ONE_WEIGHT_GROUP
weightIdx ++;
#else
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
src0++;
src1++;
dst += strideDst;
src0 += strideSrc0;
src1 += strideSrc1;
weightedStartPos += weightedPosoffset;
weightedEndPos += weightedPosoffset;
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
#if JVET_M0147_DMVR
const uint64_t MAX_UINT64 = 0xFFFFFFFFFFFFFFFFU;
void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
{
int offset, width, height;
int padsize;
Mv cMv;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
int mvshift = (MV_FRACTIONAL_BITS_INTERNAL);
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
pcPad.bufs[compID].stride = (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA);
int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
width = pcPad.bufs[compID].width;
height = pcPad.bufs[compID].height;
offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
int mvshiftTemp = mvshift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
width += (filtersize - 1);
height += (filtersize - 1);
cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
-(((filtersize >> 1) - 1) << mvshiftTemp));
clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
/* Pre-fetch similar to HEVC*/
{
CPelBuf refBuf;
Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()));
PelBuf &dstBuf = pcPad.bufs[compID];
g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
}
/*padding on all side of size DMVR_PAD_LENGTH*/
{
g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
}
}
}
inline int32_t div_for_maxq7(int64_t N, int64_t D)
{
int32_t sign, q;
sign = 0;
if (N < 0)
{
sign = 1;
N = -N;
}
q = 0;
D = (D << 3);
if (N >= D)
{
N -= D;
q++;
}
q = (q << 1);
D = (D >> 1);
if (N >= D)
{
N -= D;
q++;
}
q = (q << 1);
if (N >= (D >> 1))
q++;
if (sign)
return (-q);
return(q);
}
void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
{
int64_t iNum, iDenom;
int32_t iMvDeltaSubPel;
int32_t MvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
/*horizontal*/
iNum = (int64_t)((sadBuffer[1] - sadBuffer[3]) << MvSubPelLvl);
iDenom = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
if (0 != iDenom)
{
if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
{
iMvDeltaSubPel = div_for_maxq7(iNum, iDenom);
deltaMv[0] = (iMvDeltaSubPel);
}
else
{
if (sadBuffer[1] == sadBuffer[0])
{
deltaMv[0] = -8;// half pel
}
else
{
deltaMv[0] = 8;// half pel
}
}
}
/*vertical*/
iNum = (int64_t)((sadBuffer[2] - sadBuffer[4]) << MvSubPelLvl);
iDenom = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
if (0 != iDenom)
{
if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
{
iMvDeltaSubPel = div_for_maxq7(iNum, iDenom);
deltaMv[1] = (iMvDeltaSubPel);
}
else
{
if (sadBuffer[2] == sadBuffer[0])
{
deltaMv[1] = -8;// half pel
}
else
{
deltaMv[1] = 8;// half pel
}
}
}
return;
}
void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *delta_mv, uint64_t *pSADsArray, int width, int height)
{
const int32_t refStrideL0 = m_biLinearBufStride;
const int32_t refStrideL1 = m_biLinearBufStride;
Pel *pRefL0Orig = pRefL0;
Pel *pRefL1Orig = pRefL1;
for (int nIdx = SAD_BOTTOM; nIdx <= SAD_TOP_LEFT; ++nIdx)
{
int32_t SadOffset = ((m_pSearchOffset[nIdx].getVer() * ((DMVR_NUM_ITERATION << 1) + 1)) + m_pSearchOffset[nIdx].getHor());
pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
if (*(pSADsArray + SadOffset) == MAX_UINT64)
{
const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
*(pSADsArray + SadOffset) = cost;
}
if (nIdx == SAD_LEFT)
{
int32_t down = -1, right = -1;
if (pSADsArray[(((DMVR_NUM_ITERATION << 1) + 1))] < pSADsArray[-(((DMVR_NUM_ITERATION << 1) + 1))])
{
down = 1;
}
if (pSADsArray[1] < pSADsArray[-1])
{
right = 1;
}
m_pSearchOffset[SAD_TOP_LEFT].set(right, down);
}
if (*(pSADsArray + SadOffset) < minCost)
{
minCost = *(pSADsArray + SadOffset);
delta_mv[0] = m_pSearchOffset[nIdx].getHor();
delta_mv[1] = m_pSearchOffset[nIdx].getVer();
}
}
}
void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bBIOApplied
, const Mv mergeMV[NUM_REF_PIC_LIST_01]
)
{
int offset, deltaIntMvX, deltaIntMvY;
PelUnitBuf pcYUVTemp = pcYuvSrc0;
PelUnitBuf pcPadTemp = pcPad0;
/*always high precision MVs are used*/
int mvshift = 4;
for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
{
RefPicList refId = (RefPicList)k;
Mv cMv = pu.mv[refId];
m_iRefListIdx = refId;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
Mv startMv = mergeMV[refId];
clipMv(startMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
{
int mvshiftTemp = mvshift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
int leftPixelExtra;
if (compID == COMPONENT_Y)
{
leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
}
else
{
leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
}
deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
(startMv.getHor() >> mvshiftTemp);
deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
(startMv.getVer() >> mvshiftTemp);
CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
offset += (deltaIntMvX);
PelBuf &srcBuf = pcPadTemp.bufs[compID];
xPredInterBlk((ComponentID)compID, pu, refPic, cMv, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
bBIOApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
}
pcYUVTemp = pcYuvSrc1;
pcPadTemp = pcPad1;
}
}
uint64_t InterPrediction::xDMVRCost(int iBitDepth, Pel* pOrg, uint32_t uiRefStride, const Pel* pRef, uint32_t uiOrgStride, int iWidth, int iHeight)
{
DistParam cDistParam;
cDistParam.applyWeight = false;
cDistParam.useMR = false;
m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, uiOrgStride, uiRefStride, iBitDepth, COMPONENT_Y, iWidth, iHeight , 1);
uint64_t uiCost = cDistParam.distFunc(cDistParam);
return uiCost;
}
void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *total_delta_mv, int16_t *delta_mv, uint64_t *pSADsArray)
{
int sadStride = (((DMVR_NUM_ITERATION << 1) + 1));
uint64_t sadbuffer[5];
int32_t deltaMv[2] = { 0,0 };
if (notZeroCost && delta_mv[0] == 0 && delta_mv[1] == 0)
{
sadbuffer[0] = pSADsArray[0];
sadbuffer[1] = pSADsArray[-1];
sadbuffer[2] = pSADsArray[-sadStride];
sadbuffer[3] = pSADsArray[1];
sadbuffer[4] = pSADsArray[sadStride];
xSubPelErrorSrfc(sadbuffer, deltaMv);
total_delta_mv[0] += deltaMv[0];
total_delta_mv[1] += deltaMv[1];
}
}
void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
/*use merge MV as starting MV*/
Mv StartingMVL0(pu.mv[REF_PIC_LIST_0]);
Mv StartingMVL1(pu.mv[REF_PIC_LIST_1]);
/*Clip the starting MVs*/
clipMv(StartingMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
clipMv(StartingMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
/*L0 MC for refinement*/
{
int offset;
int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
offset += (-(int)DMVR_NUM_ITERATION);
PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
(MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)), pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1)));
xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, iRefIdx0), StartingMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
false, false, pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
);
}
/*L1 MC for refinement*/
{
int offset;
int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
offset += (-(int)DMVR_NUM_ITERATION);
PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
(MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)), pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1)));
xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, iRefIdx1), StartingMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
false, false, pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
);
}
}
void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bBIOApplied)
{
bool bDMVRApplied = true;
int iterationCount = DMVR_NUM_ITERATION;
/*Always High Precision*/
int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
/*use merge MV as starting MV*/
Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
m_biLinearBufStride = (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1));
int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
/*L0 Padding*/
m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
/*L1 Padding*/
m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
xinitMC(pu, clpRngs);
// point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (iterationCount * m_biLinearBufStride) + iterationCount;
Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (iterationCount * m_biLinearBufStride) + iterationCount;
Position puPos = pu.lumaPos();
int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
if (bDMVRApplied)
{
int num = 0;
int yStart = 0;
for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
{
for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
{
uint64_t minCost = MAX_UINT64;
bool notZeroCost = true;
int16_t totalDeltaMV[2] = { 0,0 };
int16_t deltaMV[2] = { 0, 0 };
uint64_t *pSADsArray;
for (int i = 0; i < (((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)); i++)
{
m_SADsArray[i] = MAX_UINT64;
}
pSADsArray = &m_SADsArray[(((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)) >> 1];
Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
for (int i = 0; i < iterationCount; i++)
{
deltaMV[0] = 0;
deltaMV[1] = 0;
Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
if (i == 0)
{
minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/))))
{
notZeroCost = false;
break;
}
pSADsArray[0] = minCost;
}
if (!minCost)
{
notZeroCost = false;
break;
}
xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy);
if (deltaMV[0] == 0 && deltaMV[1] == 0)
{
break;
}
totalDeltaMV[0] += deltaMV[0];
totalDeltaMV[1] += deltaMV[1];
pSADsArray += ((deltaMV[1] * (((DMVR_NUM_ITERATION << 1) + 1))) + deltaMV[0]);
}
totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
num++;
}
}
}
{
PredictionUnit subPu = pu;
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
PelUnitBuf m_cYuvRefBuffSubCuDMVRL0;
PelUnitBuf m_cYuvRefBuffSubCuDMVRL1;
PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
int x = 0, y = 0;