Commit 0d9ccf21 authored by Jeeva Raj A's avatar Jeeva Raj A Committed by Jeeva Raj A

JVET M0147 - DMVR (Decoder-side Motion Vector Refinement)

 - CE9.2.1g + RefinedMV only for MC and TMVP + SAD cost
parent e9a4a4ff
......@@ -150,6 +150,7 @@ IBC : 0 # turned off in CTC
AllowDisFracMMVD : 1
AffineAmvr : 1
LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable
DMVR : 1
# Fast tools
PBIntraFast : 1
......
......@@ -268,6 +268,9 @@ void EncApp::xInitLibCfg()
#endif
#if JVET_M0247_AFFINE_AMVR_ENCOPT
m_cEncLib.setUseAffineAmvrEncOpt ( m_AffineAmvrEncOpt );
#endif
#if JVET_M0147_DMVR
m_cEncLib.setDMVR ( m_DMVR );
#endif
m_cEncLib.setIBCMode ( m_IBCMode );
m_cEncLib.setIBCLocalSearchRangeX ( m_IBCLocalSearchRangeX );
......
......@@ -875,6 +875,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
#endif
#if JVET_M0247_AFFINE_AMVR_ENCOPT
("AffineAmvrEncOpt", m_AffineAmvrEncOpt, false, "Enable encoder optimization of affine AMVR")
#endif
#if JVET_M0147_DMVR
("DMVR", m_DMVR,
#endif
( "IBC", m_IBCMode, 0u, "IBCMode (0x1:enabled, 0x0:disabled) [default: disabled]")
( "IBCLocalSearchRangeX", m_IBCLocalSearchRangeX, 128u, "Search range of IBC local search in x direction")
......@@ -1971,6 +1974,9 @@ bool EncAppCfg::xCheckParameter()
xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" );
xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" );
xConfirmPara( m_Triangle, "Triangle is only allowed with NEXT profile" );
#if JVET_M0147_DMVR
xConfirmPara(m_DMVR, "DMVR only allowed with NEXT profile");
#endif
// ADD_NEW_TOOL : (parameter check) add a check for next tools here
}
else
......@@ -3202,6 +3208,9 @@ void EncAppCfg::xPrintParameter()
#if JVET_M0247_AFFINE_AMVR_ENCOPT
m_AffineAmvrEncOpt = m_AffineAmvr ? m_AffineAmvrEncOpt : false;
msg( VERBOSE, "AffineAmvrEncOpt:%d ", m_AffineAmvrEncOpt );
#endif
#if JVET_M0147_DMVR
msg(VERBOSE, "DMVR:%d ", m_DMVR);
#endif
}
msg(VERBOSE, "IBC:%d ", m_IBCMode);
......
......@@ -249,6 +249,9 @@ protected:
#if JVET_M0247_AFFINE_AMVR_ENCOPT
bool m_AffineAmvrEncOpt;
#endif
#if JVET_M0147_DMVR
bool m_DMVR;
#endif
unsigned m_IBCMode;
unsigned m_IBCLocalSearchRangeX;
......
......@@ -299,6 +299,10 @@ PelBufferOps::PelBufferOps()
calcBIOPar = calcBIOParCore;
calcBlkGradient = calcBlkGradientCore;
#if JVET_M0147_DMVR
copyBuffer = copyBufferCore;
padding = paddingCore;
#endif
#if ENABLE_SIMD_OPT_GBI
removeWeightHighFreq8 = removeWeightHighFreq;
removeWeightHighFreq4 = removeWeightHighFreq;
......@@ -313,6 +317,42 @@ PelBufferOps g_pelBufOP = PelBufferOps();
#endif
#endif
#if JVET_M0147_DMVR
void copyBufferCore(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height)
{
int numBytes = width * sizeof(Pel);
for (int i = 0; i < height; i++)
{
memcpy(dst + i * dstStride, src + i * srcStride, numBytes);
}
}
void paddingCore(Pel *ptr, int iStride, int iWidth, int iHeight, int padSize)
{
/*left and right padding*/
Pel *ptrTemp1 = ptr;
Pel *ptrTemp2 = ptr + (iWidth - 1);
int offset = 0;
for (int i = 0; i < iHeight; i++)
{
offset = iStride * i;
for (int j = 1; j <= padSize; j++)
{
*(ptrTemp1 - j + offset) = *(ptrTemp1 + offset);
*(ptrTemp2 + j + offset) = *(ptrTemp2 + offset);
}
}
/*Top and Bottom padding*/
int numBytes = (iWidth + padSize + padSize) * sizeof(Pel);
ptrTemp1 = (ptr - padSize);
ptrTemp2 = (ptr + (iStride * (iHeight - 1)) - padSize);
for (int i = 1; i <= padSize; i++)
{
memcpy(ptrTemp1 - (i * iStride), (ptrTemp1), numBytes);
memcpy(ptrTemp2 + (i * iStride), (ptrTemp2), numBytes);
}
}
#endif
template<>
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t gbiIdx)
{
......
......@@ -77,6 +77,10 @@ struct PelBufferOps
void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
#endif
void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize);
#if JVET_M0147_DMVR
void(*copyBuffer)(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height);
void(*padding)(Pel *dst, int stride, int width, int height, int padSize);
#endif
#if ENABLE_SIMD_OPT_GBI
void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
......@@ -90,6 +94,12 @@ extern PelBufferOps g_pelBufOP;
#endif
#endif
#if JVET_M0147_DMVR
void paddingCore(Pel *ptr, int iStride, int iWidth, int iHeight, int padSize);
void copyBufferCore(Pel *src, int srcStride, Pel *Dst, int dstStride, int width, int height);
#endif
template<typename T>
struct AreaBuf : public Size
{
......
......@@ -332,6 +332,14 @@ static const uint32_t LUMA_LEVEL_TO_DQP_LUT_MAXSIZE = 1024; ///<
#if !JVET_M0464_UNI_MTS
static const int NUM_EMT_CU_FLAG_CTX = 6; ///< number of context models for EMT CU-level flag
#endif
#if JVET_M0147_DMVR
static const int DMVR_SUBCU_WIDTH = 16;
static const int DMVR_SUBCU_HEIGHT = 16;
static const int DMVR_SUBCU_WIDTH_LOG2 = 4;
static const int DMVR_SUBCU_HEIGHT_LOG2 = 4;
static const int MAX_NUM_SUBCU_DMVR = ((MAX_CU_SIZE * MAX_CU_SIZE) >> (DMVR_SUBCU_WIDTH_LOG2 + DMVR_SUBCU_HEIGHT_LOG2));
static const int DMVR_NUM_ITERATION = 2;
#endif
//QTBT high level parameters
//for I slice luma CTB configuration para.
......
This diff is collapsed.
......@@ -94,6 +94,33 @@ protected:
int m_iRefListIdx;
PelStorage m_triangleBuf;
Mv* m_storedMv;
#if JVET_M0147_DMVR
/*buffers for bilinear Filter data for DMVR refinement*/
Pel* m_cYuvPredTempDMVRL0;
Pel* m_cYuvPredTempDMVRL1;
int m_biLinearBufStride;
/*buffers for padded data*/
PelUnitBuf m_cYuvRefBuffDMVRL0;
PelUnitBuf m_cYuvRefBuffDMVRL1;
Pel* m_cRefSamplesDMVRL0[MAX_NUM_COMPONENT];
Pel* m_cRefSamplesDMVRL1[MAX_NUM_COMPONENT];
enum SAD_POINT_INDEX
{
NOT_AVAILABLE = -1,
SAD_BOTTOM = 0,
SAD_TOP,
SAD_RIGHT,
SAD_LEFT,
SAD_TOP_LEFT,
SAD_TOP_RIGHT,
SAD_BOTTOM_LEFT,
SAD_BOTTOM_RIGHT,
SAD_CENTER,
SAD_COUNT
};
Mv m_pSearchOffset[5] = { Mv(0, 1), Mv(0, -1), Mv(1, 0), Mv(-1, 0), Mv(0, 0) };
uint64_t m_SADsArray[((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)];
#endif
Pel* m_gradX0;
Pel* m_gradY0;
......@@ -112,10 +139,22 @@ protected:
, const bool luma, const bool chroma
);
void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
#if JVET_M0147_DMVR
void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
, const bool& bioApplied
, bool isIBC
, SizeType dmvrWidth = 0
, SizeType dmvrHeight = 0
, bool bilinearMC = false
, Pel *srcPadBuf = NULL
, int32_t srcPadStride = 0
);
#else
void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
, const bool& bioApplied
, bool isIBC
);
#endif
void xAddBIOAvg4 (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
#if JVET_M0063_BDOF_FIX
......@@ -169,6 +208,16 @@ public:
#else
void weightedTriangleBlk ( PredictionUnit &pu, bool weights, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 );
#endif
#if JVET_M0147_DMVR
void xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId);
void xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bBIOApplied
, const Mv startMV[NUM_REF_PIC_LIST_01]
);
void xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *delta_mv, uint64_t *pSADsArray, int width, int height);
uint64_t xDMVRCost(int iBitDepth, Pel* pRef, uint32_t uiRefStride, const Pel* pOrg, uint32_t uiOrgStride, int iWidth, int iHeight);
void xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs);
void xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bBIOApplied );
#endif
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
void cacheAssign( CacheModel *cache );
......
......@@ -48,7 +48,10 @@
#define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
#define IF_FILTER_PREC 6 ///< Log2 of sum of filter taps
#define IF_INTERNAL_OFFS (1<<(IF_INTERNAL_PREC-1)) ///< Offset used internally
#if JVET_M0147_DMVR
#define IF_INTERNAL_PREC_BILINEAR 10 ///< Number of bits for internal precision
#define IF_FILTER_PREC_BILINEAR 4 ///< Bilinear filter coeff precision so that intermediate value will not exceed 16 bit for SIMD - bit exact
#endif
/**
* \brief Interpolation filter class
*/
......@@ -57,17 +60,36 @@ class InterpolationFilter
static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps
static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps
static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
#if JVET_M0147_DMVR
static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
#endif
public:
template<bool isFirst, bool isLast>
#if JVET_M0147_DMVR
static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMC10BitOut);
#else
static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height );
#endif
template<int N, bool isVertical, bool isFirst, bool isLast>
#if JVET_M0147_DMVR
static void filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut);
#else
static void filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff);
#endif
template<int N>
#if JVET_M0147_DMVR
void filterHor(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMC10BitOut);
#else
void filterHor(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff);
#endif
template<int N>
#if JVET_M0147_DMVR
void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMC10BitOut);
#else
void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff);
#endif
protected:
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
......@@ -76,10 +98,21 @@ protected:
public:
InterpolationFilter();
~InterpolationFilter() {}
#if JVET_M0147_DMVR
void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut);
#else
void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff );
#endif
#if JVET_M0147_DMVR
void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut);
#else
void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff );
#endif
#if JVET_M0147_DMVR
void( *m_filterCopy[2][2] ) ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMC10BitOut);
#else
void( *m_filterCopy[2][2] ) ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height );
#endif
void initInterpolationFilter( bool enable );
#ifdef TARGET_SIMD_X86
......@@ -87,9 +120,16 @@ public:
template <X86_VEXT vext>
void _initInterpolationFilterX86();
#endif
#if JVET_M0147_DMVR
void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMC10BitOut = false);
#else
void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
#endif
#if JVET_M0147_DMVR
void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMC10BitOut = false);
#else
void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
#endif
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
#endif
......
......@@ -336,12 +336,16 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
rcDP.cur.stride = iRefStride;
rcDP.cur.width = width;
rcDP.cur.height = height;
#if JVET_M0147_DMVR
rcDP.subShift = subShiftMode;
#endif
rcDP.step = step;
rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
#if JVET_M0147_DMVR
CHECK( useHadamard || rcDP.useMR, "only used in xDMVRCost with these default parameters (so far...)" );
#else
CHECK( useHadamard || rcDP.useMR || subShiftMode > 0, "only used in xDirectMCCost with these default parameters (so far...)" );
#endif
if ( bioApplied )
{
rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ];
......
......@@ -1810,6 +1810,9 @@ SPSNext::SPSNext( SPS& sps )
, m_MTTEnabled ( false )
, m_MHIntra ( false )
, m_Triangle ( false )
#if JVET_M0147_DMVR
, m_DMVR ( false )
#endif
#if ENABLE_WPP_PARALLELISM
, m_NextDQP ( false )
#endif
......
......@@ -855,6 +855,9 @@ private:
bool m_MTTEnabled; //
bool m_MHIntra;
bool m_Triangle;
#if JVET_M0147_DMVR
bool m_DMVR;
#endif
#if ENABLE_WPP_PARALLELISM
bool m_NextDQP;
#endif
......@@ -896,6 +899,10 @@ public:
bool getUseAffine () const { return m_Affine; }
void setUseAffineType ( bool b ) { m_AffineType = b; }
bool getUseAffineType () const { return m_AffineType; }
#if JVET_M0147_DMVR
void setUseDMVR(bool b) { m_DMVR = b; }
bool getUseDMVR() const { return m_DMVR; }
#endif
void setDisableMotCompress ( bool b ) { m_DisableMotionCompression = b; }
bool getDisableMotCompress () const { return m_DisableMotionCompression; }
bool getMTTEnabled () const { return m_MTTEnabled; }
......
......@@ -113,6 +113,8 @@
#define JVET_M0255_FRACMMVD_SWITCH 1 // disable fractional MVD in MMVD adaptively
#define JVET_M0823_MMVD_ENCOPT 1 // encoder optimization for MMVD
#define JVET_M0147_DMVR 1 //Decoder side Motion Vector Refinement
#if JVET_M0464_UNI_MTS
typedef std::pair<int, bool> TrMode;
typedef std::pair<int, int> TrCost;
......
......@@ -354,6 +354,13 @@ void PredictionUnit::initData()
mergeType = MRG_TYPE_DEFAULT_N;
bv.setZero();
bvd.setZero();
#if JVET_M0147_DMVR
mvRefine = false;
for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++)
{
mvdL0SubPu[i].setZero();
}
#endif
for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
{
mvpIdx[i] = MAX_UCHAR;
......@@ -407,6 +414,13 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData)
mergeType = predData.mergeType;
bv = predData.bv;
bvd = predData.bvd;
#if JVET_M0147_DMVR
mvRefine = predData.mvRefine;
for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++)
{
mvdL0SubPu[i] = predData.mvdL0SubPu[i];
}
#endif
for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
{
mvpIdx[i] = predData.mvpIdx[i];
......@@ -452,6 +466,13 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other )
mergeType = other.mergeType;
bv = other.bv;
bvd = other.bvd;
#if JVET_M0147_DMVR
mvRefine = other.mvRefine;
for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++)
{
mvdL0SubPu[i] = other.mvdL0SubPu[i];
}
#endif
for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
{
mvpIdx[i] = other.mvpIdx[i];
......
......@@ -372,6 +372,10 @@ struct InterPredictionData
Mv mv [NUM_REF_PIC_LIST_01];
int16_t refIdx [NUM_REF_PIC_LIST_01];
MergeType mergeType;
#if JVET_M0147_DMVR
bool mvRefine;
Mv mvdL0SubPu[MAX_NUM_SUBCU_DMVR];
#endif
Mv mvdAffi [NUM_REF_PIC_LIST_01][3];
Mv mvAffi[NUM_REF_PIC_LIST_01][3];
bool mhIntraFlag;
......
......@@ -65,7 +65,39 @@ UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const Cha
{
return isDualITree( cs ) ? area.singleChan( chType ) : area;
}
#if JVET_M0147_DMVR
void CS::setRefinedMotionField(CodingStructure &cs)
{
for (CodingUnit *cu : cs.cus)
{
for (auto &pu : CU::traversePUs(*cu))
{
PredictionUnit subPu = pu;
int dx, dy, x, y, num = 0;
dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
Position puPos = pu.lumaPos();
if (PU::checkDMVRCondition(pu))
{
for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy)
{
for (x = puPos.x; x < (puPos.x + pu.lumaSize().width); x = x + dx)
{
subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
subPu.mv[0] = pu.mv[0];
subPu.mv[1] = pu.mv[1];
subPu.mv[REF_PIC_LIST_0] += pu.mvdL0SubPu[num];
subPu.mv[REF_PIC_LIST_1] -= pu.mvdL0SubPu[num];
pu.mvdL0SubPu[num].setZero();
num++;
PU::spanMotionInfo(subPu);
}
}
}
}
}
}
#endif
// CU tools
bool CU::isIntra(const CodingUnit &cu)
......@@ -1426,6 +1458,27 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
}
mrgCtx.numValidMergeCand = uiArrayAddr;
}
#if JVET_M0147_DMVR
bool PU::checkDMVRCondition(const PredictionUnit& pu)
{
if (pu.cs->sps->getSpsNext().getUseDMVR())
{
return pu.mergeFlag
&& pu.mergeType == MRG_TYPE_DEFAULT_N
&& !pu.cu->affine
&& !pu.mmvdMergeFlag
&& !pu.cu->mmvdSkip
&& PU::isBiPredFromDifferentDirEqDistPoc(pu)
&& (pu.lheight() >= 8)
&& ((pu.lheight() * pu.lwidth()) >= 64)
;
}
else
{
return false;
}
}
#endif
// for ibc pu validation
bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize)
{
......@@ -3820,7 +3873,25 @@ bool PU::isBiPredFromDifferentDir( const PredictionUnit& pu )
return false;
}
#if JVET_M0147_DMVR
bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu)
{
if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
{
const int iPOC0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]);
const int iPOC1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]);
const int iPOC = pu.cu->slice->getPOC();
if ((iPOC - iPOC0)*(iPOC - iPOC1) < 0)
{
if (abs(iPOC - iPOC0) == abs(iPOC - iPOC1))
{
return true;
}
}
}
return false;
}
#endif
void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx )
{
if( PU::isBipredRestriction( pu ) )
......
......@@ -49,6 +49,9 @@ namespace CS
uint64_t getEstBits ( const CodingStructure &cs );
UnitArea getArea ( const CodingStructure &cs, const UnitArea &area, const ChannelType chType );
bool isDualITree ( const CodingStructure &cs );
#if JVET_M0147_DMVR
void setRefinedMotionField(CodingStructure &cs);
#endif
}
......@@ -151,6 +154,9 @@ namespace PU
);
bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count);
bool isBiPredFromDifferentDir (const PredictionUnit &pu);
#if JVET_M0147_DMVR
bool isBiPredFromDifferentDirEqDistPoc(const PredictionUnit &pu);
#endif
void restrictBiPredMergeCands (const PredictionUnit &pu, MergeCtx& mrgCtx);
#if JVET_M0068_M0171_MMVD_CLEANUP
void restrictBiPredMergeCandsOne (PredictionUnit &pu);
......@@ -175,6 +181,9 @@ namespace PU
void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred);
bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv);
bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize);
#if JVET_M0147_DMVR
bool checkDMVRCondition(const PredictionUnit& pu);
#endif
}
// TU tools
......
......@@ -128,6 +128,125 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s
}
}
#if JVET_M0147_DMVR
template<X86_VEXT vext>
void copyBufferSimd(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height)
{
__m128i x;
#ifdef USE_AVX2
__m256i x16;
#endif
int j, temp;
for (int i = 0; i < height; i++)
{
j = 0;
temp = width;
#ifdef USE_AVX2
while ((temp >> 4) > 0)
{
x16 = _mm256_loadu_si256((const __m256i*)(&src[i * srcStride + j]));
_mm256_storeu_si256((__m256i*)(&dst[i * dstStride + j]), x16);
j += 16;
temp -= 16;
}
#endif
while ((temp >> 3) > 0)
{
x = _mm_loadu_si128((const __m128i*)(&src[ i * srcStride + j]));
_mm_storeu_si128((__m128i*)(&dst[ i * dstStride + j]), x);
j += 8;
temp -= 8;
}
while ((temp >> 2) > 0)
{
x = _mm_loadl_epi64((const __m128i*)(&src[i * srcStride + j]));
_mm_storel_epi64((__m128i*)(&dst[i*dstStride + j]), x);
j += 4;
temp -= 4;
}
while (temp > 0)
{
dst[i * dstStride + j] = src[i * srcStride + j];
j++;
temp--;
}
}
}
template<X86_VEXT vext>
void paddingSimd(Pel *dst, int stride, int width, int height, int padSize)
{
__m128i x;
#ifdef USE_AVX2
__m256i x16;
#endif
int temp, j;
for (int i = 1; i <= padSize; i++)
{
j = 0;
temp = width;
#ifdef USE_AVX2
while ((temp >> 4) > 0)
{
x16 = _mm256_loadu_si256((const __m256i*)(&(dst[j])));
_mm256_storeu_si256((__m256i*)(dst + j - i*stride), x16);
x16 = _mm256_loadu_si256((const __m256i*)(dst + j + (height - 1)*stride));
_mm256_storeu_si256((__m256i*)(dst + j + (height - 1 + i)*stride), x16);
j = j + 16;
temp = temp - 16;
}
#endif
while ((temp >> 3) > 0)
{
x = _mm_loadu_si128((const __m128i*)(&(dst[j])));
_mm_storeu_si128((__m128i*)(dst + j - i*stride), x);
x = _mm_loadu_si128((const __m128i*)(dst + j + (height - 1)*stride));
_mm_storeu_si128((__m128i*)(dst + j + (height - 1 + i)*stride), x);
j = j + 8;
temp = temp - 8;
}
while ((temp >> 2) > 0)
{
x = _mm_loadl_epi64((const __m128i*)(&dst[j]));
_mm_storel_epi64((__m128i*)(dst + j - i*stride), x);
x = _mm_loadl_epi64((const __m128i*)(dst + j + (height - 1)*stride));
_mm_storel_epi64((__m128i*)(dst + j + (height - 1 + i)*stride), x);
j = j + 4;
temp = temp - 4;
}
while (temp > 0)
{
dst[j - i*stride] = dst[j];
dst[j + (height - 1 + i)*stride] = dst[j + (height - 1)*stride];
j++;
temp--;
}
}
//Left and Right Padding
Pel* ptr1 = dst - padSize*stride;
Pel* ptr2 = dst - padSize*stride + width - 1;
int offset = 0;
for (int i = 0; i < height + 2 * padSize; i++)
{
offset = stride * i;
for (int j = 1; j <= padSize; j++)
{
*(ptr1 - j + offset) = *(ptr1 + offset);
*(ptr2 + j + offset) = *(ptr2 + offset);
}