Newer
Older
int sz = blockWidth * blockHeight;
g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit);
g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit);
}
}
#endif

Karsten Suehring
committed
// get prediction block by block
for ( int h = 0; h < cxHeight; h += blockHeight )
{
for ( int w = 0; w < cxWidth; w += blockWidth )
{
if (compID == COMPONENT_Y || pu.chromaFormat == CHROMA_444)
if ( !subblkMVSpreadOverLimit )
{
iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h);
iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h);
}
else
{
iMvScaleTmpHor = iMvScaleHor + iDMvHorX * ( cxWidth >> 1 ) + iDMvVerX * ( cxHeight >> 1 );
iMvScaleTmpVer = iMvScaleVer + iDMvHorY * ( cxWidth >> 1 ) + iDMvVerY * ( cxHeight >> 1 );
}
Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
tmpMv.clipToStorageBitDepth();
iMvScaleTmpHor = tmpMv.getHor();
iMvScaleTmpVer = tmpMv.getVer();
if (sps.getWrapAroundEnabledFlag())
{
m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer);
Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
wrapRef = wrapClipMv( tmpMv, Position(pu.Y().x + w, pu.Y().y + h), Size(blockWidth, blockHeight), &sps);
iMvScaleTmpHor = tmpMv.getHor();
iMvScaleTmpVer = tmpMv.getVer();
}
else
{
m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer);
iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor));
iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer));
}
Anish Tamse
committed
Mv curMv = m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE) * MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE)] +
m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + iScaleY)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + iScaleX)];
roundAffineMv(curMv.hor, curMv.ver, 1);
if (sps.getWrapAroundEnabledFlag())
wrapRef = wrapClipMv( curMv, Position(pu.Y().x + (w << iScaleX), pu.Y().y + (h << iScaleY)), Size(blockWidth << iScaleX, blockHeight << iScaleY), &sps);
curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor));
curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver));
}
iMvScaleTmpHor = curMv.hor;
iMvScaleTmpVer = curMv.ver;
}

Karsten Suehring
committed
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
// get the MV in high precision
int xFrac, yFrac, xInt, yInt;
if (!iScaleX)
{
xInt = iMvScaleTmpHor >> 4;
xFrac = iMvScaleTmpHor & 15;
}
else
{
xInt = iMvScaleTmpHor >> 5;
xFrac = iMvScaleTmpHor & 31;
}
if (!iScaleY)
{
yInt = iMvScaleTmpVer >> 4;
yFrac = iMvScaleTmpVer & 15;
}
else
{
yInt = iMvScaleTmpVer >> 5;
yFrac = iMvScaleTmpVer & 31;
}
const CPelBuf refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID] ), wrapRef );
#if !JVET_O0070_PROF

Karsten Suehring
committed
PelBuf &dstBuf = dstPic.bufs[compID];
#endif
#if JVET_O0070_PROF
Pel* ref = (Pel*) refBuf.buf;
Pel* dst = dstBuf.buf + w + h * dstBuf.stride;
int refStride = refBuf.stride;
int dstStride = dstBuf.stride;
int bw = blockWidth;
int bh = blockHeight;
if (enablePROF)
{
dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
dstStride = dstExtBuf.stride;
}
#endif

Karsten Suehring
committed
if ( yFrac == 0 )
{
#if JVET_O0070_PROF
m_if.filterHor( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng);
#else

Karsten Suehring
committed
m_if.filterHor( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, xFrac, !bi, chFmt, clpRng );
#endif

Karsten Suehring
committed
}
else if ( xFrac == 0 )
{
#if JVET_O0070_PROF
m_if.filterVer( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng);
#else

Karsten Suehring
committed
m_if.filterVer( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, true, !bi, chFmt, clpRng );
#endif

Karsten Suehring
committed
}
else
{
#if JVET_O0070_PROF
m_if.filterHor( compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false, chFmt, clpRng);
#else

Karsten Suehring
committed
m_if.filterHor( compID, (Pel*) refBuf.buf - ((vFilterSize>>1) -1)*refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, blockWidth, blockHeight+vFilterSize-1, xFrac, false, chFmt, clpRng);
#endif

Karsten Suehring
committed
JVET_J0090_SET_CACHE_ENABLE( false );
#if JVET_O0070_PROF
m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng);
#else

Karsten Suehring
committed
m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, false, !bi, chFmt, clpRng);
#endif

Karsten Suehring
committed
JVET_J0090_SET_CACHE_ENABLE( true );
}
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
#if JVET_O0070_PROF
if (enablePROF)
{
const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
const int xOffset = xFrac >> 3;
const int yOffset = yFrac >> 3;
const int refOffset = (blockHeight + 1) * refStride;
const int dstOffset = (blockHeight + 1)* dstStride;
const Pel* refPel = ref - (1 - yOffset) * refStride + xOffset - 1;
Pel* dstPel = dst - dstStride - 1;
for (int pw = 0; pw < blockWidth + 2; pw++)
{
dstPel[pw] = leftShift_round(refPel[pw], shift) - (Pel)IF_INTERNAL_OFFS;
dstPel[pw+dstOffset] = leftShift_round(refPel[pw+refOffset], shift) - (Pel)IF_INTERNAL_OFFS;
}
refPel = ref + yOffset * refBuf.stride + xOffset;
dstPel = dst;
for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride)
{
dstPel[-1] = leftShift_round(refPel[-1], shift) - (Pel)IF_INTERNAL_OFFS;
dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS;
}
PelBuf gradXBuf = gradXExt.subBuf(w, h, blockWidth + 2, blockHeight + 2);
PelBuf gradYBuf = gradYExt.subBuf(w, h, blockWidth + 2, blockHeight + 2);
g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd);
const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS;
Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
Pel * dstY = dstBuf.bufAt(w, h);
if (!bi)
{
g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, shiftNum, offset, clpRng);
}
else
{
PelBuf srcExtBuf(src, dstExtBuf.stride, Size(blockWidth, blockHeight));
PelBuf destBuf(dstY, dstBuf.stride, Size(blockWidth, blockHeight));
destBuf.copyFrom(srcExtBuf);
}
}
#endif

Karsten Suehring
committed
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
}
}
}
int getMSB( unsigned x )
{
int msb = 0, bits = ( sizeof(int) << 3 ), y = 1;
while( x > 1u )
{
bits >>= 1;
y = x >> bits;
if( y )
{
x = y;
msb += bits;
}
}
msb += y;
return msb;
}
void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths)
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
const int height = yuvDst.Y().height;
const int width = yuvDst.Y().width;
int heightG = height + 2 * BIO_EXTEND_SIZE;
int widthG = width + 2 * BIO_EXTEND_SIZE;
int offsetPos = widthG*BIO_EXTEND_SIZE + BIO_EXTEND_SIZE;
Pel* gradX0 = m_gradX0;
Pel* gradX1 = m_gradX1;
Pel* gradY0 = m_gradY0;
Pel* gradY1 = m_gradY1;
int stridePredMC = widthG + 2;
const Pel* srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
const Pel* srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
const int src0Stride = stridePredMC;
const int src1Stride = stridePredMC;
Pel* dstY = yuvDst.Y().buf;
const int dstStride = yuvDst.Y().stride;
const Pel* srcY0Temp = srcY0;
const Pel* srcY1Temp = srcY1;
for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]);
Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
for (int y = 0; y< height; y++)
padStr[-1] = padStr[0];
padStr[width] = padStr[width - 1];
padStr += stridePredMC;
padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
}
const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
const int limit = (1<<(std::max<int>(5, bitDepth - 7)));

Karsten Suehring
committed
int* dotProductTemp1 = m_dotProduct1;
int* dotProductTemp2 = m_dotProduct2;
int* dotProductTemp3 = m_dotProduct3;
int* dotProductTemp5 = m_dotProduct5;
int* dotProductTemp6 = m_dotProduct6;
xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
int xUnit = (width >> 2);
int yUnit = (height >> 2);
Pel *dstY0 = dstY;
gradX0 = m_gradX0; gradX1 = m_gradX1;
gradY0 = m_gradY0; gradY1 = m_gradY1;
for (int yu = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++)
{
#if !JVET_O0055_INT_DMVR_DIS_BDOF
if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
{
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
PelBuf dstPelBuf(dstY0, dstStride, Size(4, 4));
dstPelBuf.addAvg(CPelBuf(srcY0Temp, src0Stride, Size(4, 4)), CPelBuf(srcY1Temp, src1Stride, Size(4, 4)), clpRng);
#endif
int sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
int tmpx = 0, tmpy = 0;

Karsten Suehring
committed
dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
xCalcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
if (sGx2 > 0)
{
tmpx = rightShiftMSB(sGxdI << 3, sGx2);
tmpx = Clip3(-limit, limit, tmpx);
}
if (sGy2 > 0)
{
int mainsGxGy = sGxGy >> 12;
int secsGxGy = sGxGy & ((1 << 12) - 1);
int tmpData = tmpx * mainsGxGy;
tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
tmpy = Clip3(-limit, limit, tmpy);
}
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
xAddBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
} // xu
} // yu
}
bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
{
const int width = pu.lwidth();
const int height = pu.lheight();
const int clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int xUnit = (width >> 2);
const int yUnit = (height >> 2);
m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
m_bioDistThres >>= distortionShift;
m_bioSubBlkDistThres >>= distortionShift;
DistParam cDistParam;
Distortion dist = 0;
for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++, blkIdx++)
{
const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
dist += m_bioPredSubBlkDist[blkIdx];
}
}
return (dist >= m_bioDistThres);
}
void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
}
void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
{
g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
}
void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
{
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
}
void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
}
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp /*= NULL*/)
#else
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )

Karsten Suehring
committed
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
{
#if JVET_O0070_PROF
if (pu.cu->affine && (m_applyPROF[0] || m_applyPROF[1]))
{
xApplyBiPROF(pu, pcYuvSrc0.bufs[COMPONENT_Y], pcYuvSrc1.bufs[COMPONENT_Y], pcYuvDst.bufs[COMPONENT_Y], clpRngs.comp[COMPONENT_Y]);
pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx, true);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(yuvDstTmp, "yuvDstTmp is disallowed with PROF");
#endif
return;
}
#endif
#if JVET_O0681_DIS_BPWA_CIIP
if( pu.cu->GBiIdx != GBI_DEFAULT && (yuvDstTmp || !pu.mhIntraFlag) )
#else
if( pu.cu->GBiIdx != GBI_DEFAULT )
CHECK(bioApplied, "GBi is disallowed with BIO");
pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
#if JVET_O0681_DIS_BPWA_CIIP
yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, false);
#else
if (bioApplied)
const int src0Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const int src1Stride = pu.lwidth() + 2 * BIO_EXTEND_SIZE + 2;
const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
#if JVET_O0055_INT_DMVR_DIS_BDOF
bool bioEnabled = true;
#else
bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
#endif
if (bioEnabled)
{
applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
#endif
}
else
{
pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->bufs[0].copyFrom(pcYuvDst.bufs[0]);
#endif
Takeshi Chujoh
committed
if (pu.cs->pps->getWPBiPred())
{
const int iRefIdx0 = pu.refIdx[0];
const int iRefIdx1 = pu.refIdx[1];
WPScalingParam *pwp0;
WPScalingParam *pwp1;
getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1);
if (!bioApplied)
{
addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y);
}
addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb);
addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr);
}
else
{
pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
}
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
{
if (bioApplied)
{
yuvDstTmp->bufs[1].copyFrom(pcYuvDst.bufs[1]);
yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]);
}
else
yuvDstTmp->copyFrom(pcYuvDst);
}
#endif

Karsten Suehring
committed
}
else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc0 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->copyFrom(pcYuvDst);
#endif

Karsten Suehring
committed
}
else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
{
if( pu.cu->triangle )
{
pcYuvDst.copyFrom( pcYuvSrc1 );
}
else

Karsten Suehring
committed
pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (yuvDstTmp)
yuvDstTmp->copyFrom(pcYuvDst);
#endif

Karsten Suehring
committed
}
}
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
#if JVET_O0070_PROF
void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng)
{
int blockWidth = AFFINE_MIN_BLOCK_SIZE;
int blockHeight = AFFINE_MIN_BLOCK_SIZE;
CHECK(!m_applyPROF[0] && !m_applyPROF[1], "xApplyBiPROF() applies PROF for at least one list.");
const int width = pu.Y().width;
const int height = pu.Y().height;
const int iBit = MAX_CU_DEPTH;
const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
const int bdlimit = std::max<int>(6, clpRng.bd - 6);
const int dmvLimit = 1 << bdlimit;
for (int list = 0; list < 2; list++)
{
if (m_applyPROF[list])
{
Mv mvLT = pu.mvAffi[list][0];
Mv mvRT = pu.mvAffi[list][1];
Mv mvLB = pu.mvAffi[list][2];
int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY;
iDMvHorX = (mvRT - mvLT).getHor() << (iBit - g_aucLog2[width]);
iDMvHorY = (mvRT - mvLT).getVer() << (iBit - g_aucLog2[width]);
if (pu.cu->affineType == AFFINEMODEL_6PARAM)
{
iDMvVerX = (mvLB - mvLT).getHor() << (iBit - g_aucLog2[height]);
iDMvVerY = (mvLB - mvLT).getVer() << (iBit - g_aucLog2[height]);
}
else
{
iDMvVerX = -iDMvHorY;
iDMvVerY = iDMvHorX;
}
int *dMvScaleHor = dMvBuf[list];
int *dMvScaleVer = dMvBuf[list] + 16;
int* dMvH = dMvScaleHor;
int* dMvV = dMvScaleVer;
int quadHorX = iDMvHorX << 2;
int quadHorY = iDMvHorY << 2;
int quadVerX = iDMvVerX << 2;
int quadVerY = iDMvVerY << 2;
dMvH[0] = ((iDMvHorX + iDMvVerX) << 1) - ((quadHorX + quadVerX) << 1);
dMvV[0] = ((iDMvHorY + iDMvVerY) << 1) - ((quadHorY + quadVerY) << 1);
for (int w = 1; w < blockWidth; w++)
{
dMvH[w] = dMvH[w - 1] + quadHorX;
dMvV[w] = dMvV[w - 1] + quadHorY;
}
dMvH += blockWidth;
dMvV += blockWidth;
for (int h = 1; h < blockHeight; h++)
{
for (int w = 0; w < blockWidth; w++)
{
dMvH[w] = dMvH[w - blockWidth] + quadVerX;
dMvV[w] = dMvV[w - blockWidth] + quadVerY;
}
dMvH += blockWidth;
dMvV += blockWidth;
}
if (!g_pelBufOP.roundIntVector)
{
for (int idx = 0; idx < blockWidth * blockHeight; idx++)
{
roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]);
dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]);
}
}
else
{
int sz = blockWidth * blockHeight;
g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit);
g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit);
}
}
}
const int PROF_BORDER_EXT_W = BIO_EXTEND_SIZE;
const int PROF_BORDER_EXT_H = BIO_EXTEND_SIZE;
const int cuExtW = width + PROF_BORDER_EXT_W * 2;
const int cuExtH = height + PROF_BORDER_EXT_H * 2;
PelBuf gradXExt0 = PelBuf(m_gradBuf[REF_PIC_LIST_0][0], cuExtW, cuExtH);
PelBuf gradYExt0 = PelBuf(m_gradBuf[REF_PIC_LIST_0][1], cuExtW, cuExtH);
PelBuf gradXExt1 = PelBuf(m_gradBuf[REF_PIC_LIST_1][0], cuExtW, cuExtH);
PelBuf gradYExt1 = PelBuf(m_gradBuf[REF_PIC_LIST_1][1], cuExtW, cuExtH);
Pel* gX0 = gradXExt0.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
Pel* gY0 = gradYExt0.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
Pel* gX1 = gradXExt1.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
Pel* gY1 = gradYExt1.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
int *dMvX0 = dMvBuf[REF_PIC_LIST_0];
int *dMvY0 = dMvBuf[REF_PIC_LIST_0] + 16;
int *dMvX1 = dMvBuf[REF_PIC_LIST_1];
int *dMvY1 = dMvBuf[REF_PIC_LIST_1] + 16;
const Pel* srcY0 = pcYuvSrc0.bufAt(0, 0);
const Pel* srcY1 = pcYuvSrc1.bufAt(0, 0);
Pel* dstY = pcYuvDst.bufAt(0, 0);
if(m_applyPROF[0] && m_applyPROF[1])
g_pelBufOP.applyBiPROF[1](dstY, pcYuvDst.stride, srcY0, srcY1, pcYuvSrc0.stride, width, height, gX0, gY0, gX1, gY1, gradXExt0.stride, dMvX0, dMvY0, dMvX1, dMvY1, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_0), clpRng);
else if (m_applyPROF[0])
g_pelBufOP.applyBiPROF[0](dstY, pcYuvDst.stride, srcY0, srcY1, pcYuvSrc0.stride, width, height, gX0, gY0, gX1, gY1, gradXExt0.stride, dMvX0, dMvY0, dMvX1, dMvY1, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_0), clpRng);
else
g_pelBufOP.applyBiPROF[0](dstY, pcYuvDst.stride, srcY1, srcY0, pcYuvSrc0.stride, width, height, gX1, gY1, gX0, gY0, gradXExt0.stride, dMvX1, dMvY1, dMvX0, dMvY0, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_1), clpRng);
}
#endif
void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
, PelUnitBuf* predBufWOBIO /*= NULL*/
#endif

Karsten Suehring
committed
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(predBufWOBIO && pu.mhIntraFlag, "the case should not happen!");
#endif
if ((!luma || !chroma) && eRefPicList == REF_PIC_LIST_0)
#if !JVET_O0258_REMOVE_CHROMA_IBC_FOR_DUALTREE
if (!luma && chroma)
{
xChromaMC(pu, predBuf);
return;
}
else // (luma && !chroma)
{
xPredInterUni(pu, eRefPicList, predBuf, false
, false
, luma, chroma);
return;
#if !JVET_O0258_REMOVE_CHROMA_IBC_FOR_DUALTREE

Karsten Suehring
committed
CodingStructure &cs = *pu.cs;
const PPS &pps = *cs.pps;
const SliceType sliceType = cs.slice->getSliceType();
if( eRefPicList != REF_PIC_LIST_X )
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(predBufWOBIO != NULL, "the case should not happen!");
#endif

Karsten Suehring
committed
if( ( ( sliceType == P_SLICE && pps.getUseWP() ) || ( sliceType == B_SLICE && pps.getWPBiPred() ) ) )
{
xPredInterUni ( pu, eRefPicList, predBuf, true
, false
, true, true
);

Karsten Suehring
committed
xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
}
else
{
xPredInterUni( pu, eRefPicList, predBuf, false
, false
, true, true
);

Karsten Suehring
committed
}
}
else
{
CHECK( !pu.cu->affine && pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0 && ( pu.lwidth() + pu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
WPScalingParam *wp0;
WPScalingParam *wp1;
int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
pu.cs->slice->getWpScaling(REF_PIC_LIST_0, refIdx0, wp0);
pu.cs->slice->getWpScaling(REF_PIC_LIST_1, refIdx1, wp1);
bool bioApplied = false;
const Slice &slice = *pu.cs->slice;
#if JVET_O1140_SLICE_DISABLE_BDOF_DMVR_FLAG
if (pu.cs->sps->getBDOFEnabledFlag() && (!pu.cs->slice->getDisBdofDmvrFlag()))
#else
if (pu.cs->sps->getBDOFEnabledFlag())
{
if (pu.cu->affine || m_subPuMC)
{
bioApplied = false;
}
else
{
const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag) && slice.getSliceType() == B_SLICE);
const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
if (biocheck0
&& biocheck1
&& PU::isBiPredFromDifferentDir(pu)
)
{
bioApplied = true;
}
}
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (bioApplied && pu.mhIntraFlag)
{
bioApplied = false;
}
#endif
if (pu.cu->cs->sps->getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT)
if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag)
bioApplied = false;
}
}
bool dmvrApplied = false;
dmvrApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu);
if ((pu.lumaSize().width > MAX_BDOF_APPLICATION_REGION || pu.lumaSize().height > MAX_BDOF_APPLICATION_REGION) && pu.mergeType != MRG_TYPE_SUBPU_ATMVP && (bioApplied && !dmvrApplied))
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
xSubPuBio(pu, predBuf, eRefPicList, predBufWOBIO);
#else
if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)

Karsten Suehring
committed
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
CHECK(predBufWOBIO != NULL, "the case should not happen!");
#endif

Karsten Suehring
committed
xSubPuMC( pu, predBuf, eRefPicList );
}
else if( xCheckIdenticalMotion( pu ) )
{
xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
, false
, true, true
);
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
if (predBufWOBIO)
predBufWOBIO->copyFrom(predBuf);
#endif

Karsten Suehring
committed
}
else
{
#if JVET_O0108_DIS_DMVR_BDOF_CIIP
xPredInterBi(pu, predBuf, predBufWOBIO);
#else

Karsten Suehring
committed
xPredInterBi( pu, predBuf );

Karsten Suehring
committed
}
}
return;
}
void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList

Karsten Suehring
committed
{
for( auto &pu : CU::traversePUs( cu ) )
{
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
pu.mvRefine = true;
motionCompensation( pu, predBuf, eRefPicList
pu.mvRefine = false;

Karsten Suehring
committed
}
}
void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/

Karsten Suehring
committed
{
PelUnitBuf predBuf = pu.cs->getPredBuf( pu );
motionCompensation( pu, predBuf, eRefPicList

Karsten Suehring
committed
}
int InterPrediction::rightShiftMSB(int numer, int denom)
{
int d;
int msbIdx = 0;
for (msbIdx = 0; msbIdx<32; msbIdx++)
{
if (denom < ((int)1 << msbIdx))
{
break;
}
}

Karsten Suehring
committed
int shiftIdx = msbIdx - 1;
d = (numer >> shiftIdx);

Karsten Suehring
committed

Karsten Suehring
committed
void InterPrediction::motionCompensation4Triangle( CodingUnit &cu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1 )
{
for( auto &pu : CU::traversePUs( cu ) )
{
const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, pu.lwidth(), pu.lheight() ) );
PelUnitBuf tmpTriangleBuf = m_triangleBuf.getBuf( localUnitArea );
PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
PU::spanMotionInfo( pu );
motionCompensation( pu, tmpTriangleBuf );
{
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
{
printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
}
}
PU::spanMotionInfo( pu );
motionCompensation( pu, predBuf );
{
if( g_mctsDecCheckEnabled && !MCTSHelper::checkMvBufferForMCTSConstraint( pu, true ) )
{
printf( "DECODER_TRIANGLE_PU: pu motion vector across tile boundaries (%d,%d,%d,%d)\n", pu.lx(), pu.ly(), pu.lwidth(), pu.lheight() );
}
}
weightedTriangleBlk( pu, splitDir, MAX_NUM_CHANNEL_TYPE, predBuf, tmpTriangleBuf, predBuf );
void InterPrediction::weightedTriangleBlk( PredictionUnit &pu, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
if( channel == CHANNEL_TYPE_LUMA )
{
Yuling Hsiao
committed
#if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
#else
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
Yuling Hsiao
committed
#endif
}
else if( channel == CHANNEL_TYPE_CHROMA )
{
Yuling Hsiao
committed
#if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
#else
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
Yuling Hsiao
committed
#endif
Yuling Hsiao
committed
#if JVET_O0280_SIMD_TRIANGLE_WEIGHTING
m_if.weightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
m_if.weightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
#else
xWeightedTriangleBlk( pu, pu.lumaSize().width, pu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1 );
xWeightedTriangleBlk( pu, pu.chromaSize().width, pu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1 );
Yuling Hsiao
committed
#endif
Yuling Hsiao
committed
#if !JVET_O0280_SIMD_TRIANGLE_WEIGHTING
void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 )
{
Pel* dst = predDst .get(compIdx).buf;
Pel* src0 = predSrc0.get(compIdx).buf;
Pel* src1 = predSrc1.get(compIdx).buf;
int32_t strideDst = predDst .get(compIdx).stride - width;
int32_t strideSrc0 = predSrc0.get(compIdx).stride - width;
int32_t strideSrc1 = predSrc1.get(compIdx).stride - width;
const char log2WeightBase = 3;
const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx];
const int32_t clipbd = clipRng.bd;
const int32_t shiftDefault = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
const int32_t offsetDefault = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
const int32_t ratioWH = (width > height) ? (width / height) : 1;
const int32_t ratioHW = (width > height) ? 1 : (height / width);
const bool longWeight = (compIdx == COMPONENT_Y);
const int32_t weightedLength = longWeight ? 7 : 3;
int32_t weightedStartPos = ( splitDir == 0 ) ? ( 0 - (weightedLength >> 1) * ratioWH ) : ( width - ((weightedLength + 1) >> 1) * ratioWH );
int32_t weightedEndPos = weightedStartPos + weightedLength * ratioWH - 1;
int32_t weightedPosoffset =( splitDir == 0 ) ? ratioWH : -ratioWH;
Pel tmpPelWeighted;
int32_t weightIdx;
int32_t x, y, tmpX, tmpY, tmpWeightedStart, tmpWeightedEnd;
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src1 : *src0) + offsetDefault, shiftDefault), clipRng );
src0++;
src1++;
tmpWeightedStart = std::max((int32_t)0, weightedStartPos);
tmpWeightedEnd = std::min(weightedEndPos, (int32_t)(width - 1));
weightIdx += abs(weightedStartPos) / ratioWH;
for( x = tmpWeightedStart; x <= tmpWeightedEnd; x+= ratioWH )
tmpPelWeighted = Clip3( 1, 7, longWeight ? weightIdx : (weightIdx * 2));
tmpPelWeighted = splitDir ? ( 8 - tmpPelWeighted ) : tmpPelWeighted;
*dst++ = ClipPel( rightShift( (tmpPelWeighted*(*src0++) + ((8 - tmpPelWeighted) * (*src1++)) + offsetWeighted), shiftWeighted ), clipRng );
*dst++ = ClipPel( rightShift( (splitDir == 0 ? *src0 : *src1) + offsetDefault, shiftDefault ), clipRng );
src0++;
src1++;
dst += strideDst;
src0 += strideSrc0;
src1 += strideSrc1;
weightedStartPos += weightedPosoffset;
weightedEndPos += weightedPosoffset;
Yuling Hsiao
committed
#endif
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
#if JVET_O0297_DMVR_PADDING // For Dec speedup
void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma)
{
int offset, width, height;
Mv cMv;
const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
int mvShift = (MV_FRACTIONAL_BITS_INTERNAL);
int start = 0;
int end = MAX_NUM_COMPONENT;
start = forLuma ? 0 : 1;
end = forLuma ? 1 : MAX_NUM_COMPONENT;
for (int compID = start; compID < end; compID++)
{
cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
pcPad.bufs[compID].stride = (pcPad.bufs[compID].width + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA);
int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
width = pcPad.bufs[compID].width;
height = pcPad.bufs[compID].height;