Newer
Older
/*L0 MC for refinement*/
{
int offset;
int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
offset += (-(int)DMVR_NUM_ITERATION);
PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
m_biLinearBufStride
, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->unscaledPic, mergeMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_0, refIdx0 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride );
}
/*L1 MC for refinement*/
{
int offset;
int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
offset += (-(int)DMVR_NUM_ITERATION);
PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
m_biLinearBufStride
, pu.lwidth() + (2 * DMVR_NUM_ITERATION), pu.lheight() + (2 * DMVR_NUM_ITERATION)));
xPredInterBlk( COMPONENT_Y, pu, pu.cu->slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->unscaledPic, mergeMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
false, false, pu.cu->slice->getScalingRatio( REF_PIC_LIST_1, refIdx1 ), pu.lwidth() + ( 2 * DMVR_NUM_ITERATION ), pu.lheight() + ( 2 * DMVR_NUM_ITERATION ), true, ( (Pel *)srcBuf.buf ) + offset, srcBuf.stride );
void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied)
/*Always High Precision*/
int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
/*use merge MV as starting MV*/
Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
m_biLinearBufStride = (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION));
int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
int dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
Position puPos = pu.lumaPos();
int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
{
int num = 0;
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
int scaleX = getComponentScaleX(COMPONENT_Cb, pu.chromaFormat);
int scaleY = getComponentScaleY(COMPONENT_Cb, pu.chromaFormat);
m_biLinearBufStride = (dx + (2 * DMVR_NUM_ITERATION));
// point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (DMVR_NUM_ITERATION * m_biLinearBufStride) + DMVR_NUM_ITERATION;
PredictionUnit subPu = pu;
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
m_cYuvRefBuffDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
m_cYuvRefBuffDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
int yStart = 0;
for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
{
for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
PredictionUnit subPu = pu;
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 1);
xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 1);
xinitMC(subPu, clpRngs);
uint64_t minCost = MAX_UINT64;
bool notZeroCost = true;
int16_t totalDeltaMV[2] = { 0,0 };
int16_t deltaMV[2] = { 0, 0 };
uint64_t *pSADsArray;
for (int i = 0; i < (((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)); i++)
{
m_SADsArray[i] = MAX_UINT64;
}
pSADsArray = &m_SADsArray[(((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)) >> 1];
for (int i = 0; i < iterationCount; i++)
{
deltaMV[0] = 0;
deltaMV[1] = 0;
Pel *addrL0 = biLinearPredL0 + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
Pel *addrL1 = biLinearPredL1 - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
if (i == 0)
{
minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
{
notZeroCost = false;
break;
}
pSADsArray[0] = minCost;
}
if (!minCost)
{
notZeroCost = false;
break;
}
xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy);
if (deltaMV[0] == 0 && deltaMV[1] == 0)
{
break;
}
totalDeltaMV[0] += deltaMV[0];
totalDeltaMV[1] += deltaMV[1];
pSADsArray += ((deltaMV[1] * (((2 * DMVR_NUM_ITERATION) + 1))) + deltaMV[0]);
bioAppliedType[num] = (minCost < bioEnabledThres) ? false : bioApplied;
totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
bool blockMoved = false;
if (pu.mvdL0SubPu[num] != Mv(0, 0))
{
blockMoved = true;
xPrefetch(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0, 0);
xPrefetch(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1, 0);
xPad(subPu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
xPad(subPu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
}
int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
subPu.mv[0].clipToStorageBitDepth();
subPu.mv[1].clipToStorageBitDepth();
xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffDMVRL0, m_cYuvRefBuffDMVRL1, bioAppliedType[num], mergeMv
, blockMoved
);
subPredBuf.bufs[COMPONENT_Y].buf = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cb]);
subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMPONENT_Cr]);
xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bioAppliedType[num]);
num++;
}
}
}
JVET_J0090_SET_CACHE_ENABLE(true);

Karsten Suehring
committed
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
void InterPrediction::cacheAssign( CacheModel *cache )
{
m_cacheModel = cache;
m_if.cacheAssign( cache );
m_if.initInterpolationFilter( !cache->isCacheEnable() );
}
#endif
void InterPrediction::xFillIBCBuffer(CodingUnit &cu)
{
for (auto &currPU : CU::traverseTUs(cu))
{
for (const CompArea &area : currPU.blocks)
{
const unsigned int lcuWidth = cu.cs->slice->getSPS()->getMaxCUWidth();
const int shiftSample = ::getComponentScaleX(area.compID, cu.chromaFormat);
const int ctuSizeLog2 = floorLog2(lcuWidth) - shiftSample;
const int pux = area.x & ((m_IBCBufferWidth >> shiftSample) - 1);
const int puy = area.y & (( 1 << ctuSizeLog2 ) - 1);
const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height));
CPelBuf srcBuf = cu.cs->getRecoBuf(area);
PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea);
dstBuf.copyFrom(srcBuf);
}
}
}
void InterPrediction::xIntraBlockCopy(PredictionUnit &pu, PelUnitBuf &predBuf, const ComponentID compID)
{
const unsigned int lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
int shiftSample = ::getComponentScaleX(compID, pu.chromaFormat);
const int ctuSizeLog2 = floorLog2(lcuWidth) - shiftSample;
pu.bv = pu.mv[REF_PIC_LIST_0];
pu.bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
int refx, refy;
if (compID == COMPONENT_Y)
{
refx = pu.Y().x + pu.bv.hor;
refy = pu.Y().y + pu.bv.ver;
}
else
{//Cb or Cr
refx = pu.Cb().x + (pu.bv.hor >> shiftSample);
refy = pu.Cb().y + (pu.bv.ver >> shiftSample);
}
refx &= ((m_IBCBufferWidth >> shiftSample) - 1);
refy &= ((1 << ctuSizeLog2) - 1);
if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSample))
const CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height));
const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea);
predBuf.bufs[compID].copyFrom(refBuf);
{//wrap around
int width = (m_IBCBufferWidth >> shiftSample) - refx;
CompArea srcArea = CompArea(compID, pu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height));
CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea);
PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
dstBuf.copyFrom(srcBuf);
width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSample);
srcArea = CompArea(compID, pu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height));
srcBuf = m_IBCBuffer.getBuf(srcArea);
dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSample) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
dstBuf.copyFrom(srcBuf);
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
}
}
void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize)
{
const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize));
m_IBCBuffer.getBuf(area).fill(-1);
}
void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos)
{
const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize));
m_IBCBuffer.getBuf(area).fill(-1);
}
bool InterPrediction::isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv)
{
if(((yCb + yBv) & (ctuSize - 1)) + height > ctuSize)
{
return false;
}
int refTLx = xCb + xBv;
int refTLy = (yCb + yBv) & (ctuSize - 1);
PelBuf buf = m_IBCBuffer.Y();
for(int x = 0; x < width; x += 4)
{
for(int y = 0; y < height; y += 4)
{
if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
}
}
return true;
}
bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const CompArea &blk, const Picture* refPic, const Mv& mv, Pel* dst, const int dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf )
const ChromaFormat chFmt = blk.chromaFormat;
const ComponentID compID = blk.compID;
const bool rndRes = !bi;
int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX( compID, chFmt );
int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY( compID, chFmt );
int width = blk.width;
int height = blk.height;
CPelBuf refBuf;
const bool scaled = scalingRatio != SCALE_1X;
if( scaled )
{
int row, col;
int refPicWidth = refPic->cs->pps->getPicWidthInLumaSamples();
int refPicHeight = refPic->cs->pps->getPicHeightInLumaSamples();
const int posShift = SCALE_RATIO_BITS - 4;
int stepX = ( scalingRatio.first + 8 ) >> 4;
int stepY = ( scalingRatio.second + 8 ) >> 4;
int64_t x0Int;
int64_t y0Int;
int offX = 1 << ( posShift - shiftHor - 1 );
int offY = 1 << ( posShift - shiftVer - 1 );
x0Int = ( ( blk.pos().x << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() )* scalingRatio.first;
x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) );
y0Int = ( ( blk.pos().y << ( 4 + ::getComponentScaleY( compID, chFmt ) ) ) + mv.getVer() )* scalingRatio.second;
y0Int = SIGN( y0Int ) * ( ( llabs( y0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleY( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleY( compID, chFmt ) ) );
const int extSize = isLuma( compID ) ? 1 : 2;
int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA;
int refHeight = height * scalingRatio.second >> SCALE_RATIO_BITS;
refHeight = std::max<int>( 1, refHeight );
CHECK( MAX_CU_SIZE * MAX_SCALING_RATIO < refHeight + vFilterSize - 1 + extSize, "Buffer size is not enough, increase MAX_SCALING_RATIO" );
Pel buffer[( MAX_CU_SIZE + 16 ) * ( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 )];
int tmpStride = width;
int yInt0 = ( (int32_t)y0Int + offY ) >> posShift;
yInt0 = std::min( std::max( 0, yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) );
int xInt0 = ( (int32_t)x0Int + offX ) >> posShift;
xInt0 = std::min( std::max( 0, xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) );
int xInt = 0, yInt = 0;
for( col = 0; col < width; col++ )
{
int posX = (int32_t)x0Int + col * stepX;
xInt = ( posX + offX ) >> posShift;
xInt = std::min( std::max( 0, xInt ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) );
int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 );
CHECK( xInt0 > xInt, "Wrong horizontal starting point" );
Position offset = Position( xInt, yInt0 );
refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, refHeight ) ), wrapRef );
Pel* tempBuf = buffer + col;
m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, filterIndex, false, useAltHpelIf );
}
for( row = 0; row < height; row++ )
{
int posY = (int32_t)y0Int + row * stepY;
yInt = ( posY + offY ) >> posShift;
yInt = std::min( std::max( 0, yInt ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) );
int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 );
CHECK( yInt0 > yInt, "Wrong vertical starting point" );
Pel* tempBuf = buffer + ( yInt - yInt0 ) * tmpStride;
JVET_J0090_SET_CACHE_ENABLE( false );
m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, filterIndex, false, useAltHpelIf );
JVET_J0090_SET_CACHE_ENABLE( true );
}
Position offset = Position( xInt, yInt );
refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, 1 ) ), wrapRef );
}
return scaled;
}