diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index 59e832da9243a6d2aaaf72adb89f13d7167b58e9..66328ec040558146ec129bca091b440e70ba6a84 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -420,6 +420,10 @@ void EncApp::xInitLibCfg() m_cEncLib.setNoTimdConstraintFlag(m_noTimdConstraintFlag); CHECK(m_noTimdConstraintFlag && m_timd, "TIMD shall be deactivated when m_noTimdConstraintFlag is equal to 1"); #endif +#if JVET_AB0155_SGPM + m_cEncLib.setNoSgpmConstraintFlag(m_noSgpmConstraintFlag); + CHECK(m_noSgpmConstraintFlag && m_sgpm, "SGPM shall be deactivated when m_noSgpmConstraintFlag is equal to 1"); +#endif #if ENABLE_OBMC m_cEncLib.setNoObmcConstraintFlag(m_noObmcConstraintFlag); CHECK(m_noObmcConstraintFlag && m_OBMC, "OBMC shall be deactivated when m_noObmcConstraintFlag is equal to 1"); @@ -557,6 +561,9 @@ void EncApp::xInitLibCfg() #if JVET_W0123_TIMD_FUSION m_cEncLib.setNoTimdConstraintFlag(false); #endif +#if JVET_AB0155_SGPM + m_cEncLib.setNoSgpmConstraintFlag(false); +#endif #if ENABLE_OBMC m_cEncLib.setNoObmcConstraintFlag(false); #endif @@ -847,6 +854,9 @@ void EncApp::xInitLibCfg() #if JVET_W0123_TIMD_FUSION m_cEncLib.setUseTimd ( m_timd ); #endif +#if JVET_AB0155_SGPM + m_cEncLib.setUseSgpm ( m_sgpm ); +#endif #if ENABLE_OBMC m_cEncLib.setUseObmc ( m_OBMC ); #endif diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index ae45ca9689c7ecc05ad842e253799bfaa8fa2088..3da2ca3515bfcb168f10cb4b7a222a553dfa3703 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -920,6 +920,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #if JVET_W0123_TIMD_FUSION ("NoTimdConstraintFlag", m_noTimdConstraintFlag, false, "Indicate that TIMD is deactivated") #endif +#if JVET_AB0155_SGPM + ("NoSgpmConstraintFlag", m_noSgpmConstraintFlag, false, "Indicate that SGPM is deactivated") +#endif #if ENABLE_OBMC ("NoObmcConstraintFlag", m_noObmcConstraintFlag, false, "Indicate that OBMC is deactivated") #endif @@ -1096,6 +1099,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #if JVET_W0123_TIMD_FUSION ( "TIMD", m_timd, true, "Enable template based intra mode derivation\n" ) #endif +#if JVET_AB0155_SGPM + ( "SGPM", m_sgpm, true, "Enable spatial geometric partitioning mode\n" ) +#endif #if ENABLE_OBMC ("OBMC", m_OBMC, true, "Overlapping Block Motion Compensation") #endif @@ -5209,6 +5215,9 @@ void EncAppCfg::xPrintParameter() #if JVET_W0123_TIMD_FUSION msg( VERBOSE, "TIMD:%d ", m_timd ); #endif +#if JVET_AB0155_SGPM + msg(VERBOSE, "SGPM:%d ", m_sgpm); +#endif #if JVET_V0130_INTRA_TMP msg( VERBOSE, "IntraTMP:%d ", m_intraTMP ); msg( VERBOSE, "IntraTmpMaxSize:%d ", m_intraTmpMaxSize ); diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 157621de4960bb70c66e08d2ce5981fd0d512356..c679eebc751e3e50bc7a7e237467b888173b74a8 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -208,6 +208,9 @@ protected: #if JVET_W0123_TIMD_FUSION bool m_noTimdConstraintFlag; #endif +#if JVET_AB0155_SGPM + bool m_noSgpmConstraintFlag; +#endif #if ENABLE_OBMC bool m_noObmcConstraintFlag; #endif @@ -457,6 +460,9 @@ protected: #if JVET_W0123_TIMD_FUSION bool m_timd; #endif +#if JVET_AB0155_SGPM + bool m_sgpm; +#endif #if ENABLE_OBMC bool m_OBMC; #endif diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index e659e43397c33bff3d1cae7bc5fa3ad3aa6c7508..8e00e1fbc7a0b32cf1937f5d40a583d8c9c24a61 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -473,6 +473,9 @@ static const int LM_CHROMA_IDX = NUM_LUMA_MODE; ///< chroma mode index for deriv #if ENABLE_DIMD static const int DIMD_IDX = 99; ///< index for intra DIMD mode #endif +#if JVET_AB0155_SGPM +static const int SGPM_IDX = 200; ///< index for SGPM mode +#endif #if JVET_W0123_TIMD_FUSION static const int TIMD_IDX = 199; ///< index for intra TIMD mode static const int DIMD_MAX_TEMP_SIZE = 4; @@ -548,7 +551,13 @@ static const int NUM_MOST_PROBABLE_MODES = 6; static const int LM_SYMBOL_NUM = (1 + NUM_LMC_MODE); static const int MAX_NUM_MIP_MODE = 32; ///< maximum number of MIP pred. modes +#if JVET_AB0155_SGPM +static const int SGPM_NUM = 16; +static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE + SGPM_NUM); ///< maximum number of RD comparison in fast-UDI estimation loop +#else + static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE); ///< maximum number of RD comparison in fast-UDI estimation loop +#endif static const int MAX_LFNST_COEF_NUM = 16; @@ -1026,13 +1035,28 @@ static const int GEO_NUM_ANGLES = 32; static const int GEO_NUM_DISTANCES = 4; static const int GEO_NUM_PRESTORED_MASK = 6; static const int GEO_WEIGHT_MASK_SIZE = 3 * (GEO_MAX_CU_SIZE >> 3) * 2 + GEO_MAX_CU_SIZE; -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING + +#if JVET_AB0155_SGPM +static const int GEO_MIN_CU_LOG2_EX = 2; +static const int GEO_MAX_CU_LOG2_EX = 6; +static const int GEO_MIN_CU_SIZE_EX = 1 << GEO_MIN_CU_LOG2_EX; +static const int GEO_MAX_CU_SIZE_EX = 1 << GEO_MAX_CU_LOG2_EX; +static const int GEO_NUM_CU_SIZE_EX = (GEO_MAX_CU_LOG2_EX - GEO_MIN_CU_LOG2_EX) + 1; + +static const int SGPM_MIN_PIX = 32; +static const int SGPM_NUM_MPM = 3; +static const int SGPM_TEMPLATE_SIZE = 1; +#endif + +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM #if !JVET_W0090_ARMC_TM static const int AML_MERGE_TEMPLATE_SIZE = 1; #endif static const int GEO_MODE_SEL_TM_SIZE = AML_MERGE_TEMPLATE_SIZE; static const int GEO_TM_ADDED_WEIGHT_MASK_SIZE = GEO_MODE_SEL_TM_SIZE; static const int GEO_WEIGHT_MASK_SIZE_EXT = GEO_WEIGHT_MASK_SIZE + GEO_TM_ADDED_WEIGHT_MASK_SIZE * 2; +#endif +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING static const int GEO_SPLIT_MODE_RICE_CODE_DIVISOR = 4; static const int GEO_MODE_COMPRESSION_RATIO = 2; static const int GEO_NUM_SIG_PARTMODE = GEO_NUM_PARTITION_MODE / GEO_MODE_COMPRESSION_RATIO; ///< max number of splitting modes for signaling @@ -1060,6 +1084,11 @@ static const int GEO_MAX_TRY_WEIGHTED_SATD = 8; #if JVET_AA0058_GPM_ADP_BLD static const int GEO_NUM_BLD = 5; #endif +#if JVET_AB0155_SGPM +static const int TOTAL_GEO_NUM_BLD = 6; // GPM 0~4, SGPM 1~5 +#define GET_SGPM_BLD_IDX(a, b) \ + (std::min(a, b) <= 4 ? 1 : std::min(a, b) <= 8 ? 2 : std::min(a, b) <= 16 ? 3 : std::min(a, b) <= 32 ? 4 : 5) +#endif #if ENABLE_OBMC static const unsigned int defaultWeight[2][4] = { {27, 16, 6, 0}, {27, 0, 0, 0} }; diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp index bb791a5c9f500600cfa57be82e8ca009706c8ece..132a334d42cc01769e41d7a5f43c32302e6f49a9 100644 --- a/source/Lib/CommonLib/ContextModelling.cpp +++ b/source/Lib/CommonLib/ContextModelling.cpp @@ -402,6 +402,19 @@ unsigned DeriveCtx::CtxTimdFlag(const CodingUnit& cu) } #endif +#if JVET_AB0155_SGPM +unsigned DeriveCtx::CtxSgpmFlag(const CodingUnit &cu) +{ + const CodingStructure *cs = cu.cs; + unsigned ctxId = 0; + const CodingUnit * cuLeft = cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L); + ctxId = (cuLeft && cuLeft->sgpm) ? 1 : 0; + const CodingUnit *cuAbove = cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L); + ctxId += (cuAbove && cuAbove->sgpm) ? 1 : 0; + return ctxId; +} +#endif + unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu ) { const CodingUnit *cuLeft = cu.cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L); diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h index 81dd14e66c861f16e4958a857f931197404d899b..db3638d8952c44c4f1ee8cfd72bc9fd81dc72df3 100644 --- a/source/Lib/CommonLib/ContextModelling.h +++ b/source/Lib/CommonLib/ContextModelling.h @@ -717,6 +717,9 @@ unsigned CtxDIMDFlag(const CodingUnit& cu); #if JVET_W0123_TIMD_FUSION unsigned CtxTimdFlag( const CodingUnit& cu ); #endif +#if JVET_AB0155_SGPM +unsigned CtxSgpmFlag(const CodingUnit &cu); +#endif } #endif // __CONTEXTMODELLING__ diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index 78339e8f0548bc56d7218c4afa4a471083f3d5f9..27a6b536de8d8c239f61eb974e30e30edea53543 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -2682,6 +2682,22 @@ const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet }); #endif +#if JVET_AB0155_SGPM +const CtxSet ContextSetCfg::SgpmFlag = ContextSetCfg::addCtxSet +({ + { 41, 34, 42, }, + { 34, 34, 34, }, + { 42, 50, 58, }, + { 6, 6, 6, }, + { 7, 7, 5, }, + { 6, 6, 2, }, + { 11, 11, 18, }, + { 4, 4, 4, }, + { 11, 11, 11, }, + { 124, 126, 126, }, + { 126, 124, 117, }, +}); +#endif #if ENABLE_OBMC const CtxSet ContextSetCfg::ObmcFlag = ContextSetCfg::addCtxSet ({ diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h index 19352e5410fe0c69b4bccf976d185514aa859472..208c874fd639f755498e6523003a9bb13127894c 100644 --- a/source/Lib/CommonLib/Contexts.h +++ b/source/Lib/CommonLib/Contexts.h @@ -487,6 +487,9 @@ public: #if JVET_W0123_TIMD_FUSION static const CtxSet TimdFlag; #endif +#if JVET_AB0155_SGPM + static const CtxSet SgpmFlag; +#endif #if ENABLE_OBMC static const CtxSet ObmcFlag; #endif diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp index cb9a2cd501953ce89cf0ba652cc396fbc2c7b701..7adc95bb18781d49de392133d069eac01bad2aff 100644 --- a/source/Lib/CommonLib/InterpolationFilter.cpp +++ b/source/Lib/CommonLib/InterpolationFilter.cpp @@ -1283,6 +1283,11 @@ InterpolationFilter::InterpolationFilter() #if JVET_Y0065_GPM_INTRA m_weightedGeoBlkRounded = xWeightedGeoBlkRounded; #endif +#if JVET_AB0155_SGPM + m_weightedSgpm = xWeightedSgpm; + m_sadTM = xSadTM; + m_sgpmSadTM = xSgpmSadTM; +#endif #endif #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING m_weightedGeoTplA = xWeightedGeoTpl<true>; @@ -2145,6 +2150,209 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in } } +#if JVET_AB0155_SGPM +int InterpolationFilter::xSadTM(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf, + PelBuf &adBuf) +{ + int sad = 0; + int32_t iPredStride = predBuf.stride; + int32_t iRecStride = recBuf.stride; + int32_t iAdStride = adBuf.stride; + + // top template + Pel *piPred = predBuf.buf + templateWidth; + // start point of predBuf is (-templateWidth, -templateHeight) of current block + Pel *piAd = adBuf.buf + templateWidth; + Pel *piRec = recBuf.buf - templateHeight * iRecStride; // start point of recBuf is (0,0) of current block + + for (int y = 0; y < templateHeight; y++) + { + for (int x = 0; x < width; x++) + { + *piAd = abs(*piRec - *piPred); + sad += *piAd; + piRec++; + piPred++; + piAd++; + } + piPred += (iPredStride - width); + piAd += (iAdStride - width); + piRec += (iRecStride - width); + } + // left template + piPred = predBuf.buf + templateHeight * iPredStride; + // start point of predBuf is (-templateWidth, -templateHeight) of current block + piAd = adBuf.buf + templateHeight * iAdStride; + piRec = recBuf.buf - templateWidth; // start point of recBuf is (0,0) of current block + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < templateWidth; x++) + { + *piAd = abs(*piRec - *piPred); + sad += *piAd; + piRec++; + piPred++; + piAd++; + } + piPred += (iPredStride - templateWidth); + piAd += (iAdStride - templateWidth); + piRec += (iRecStride - templateWidth); + + } + return sad; +} + +int InterpolationFilter::xSgpmSadTM(const PredictionUnit &pu, const int width, const int height, + const int templateWidth, const int templateHeight, const ComponentID compIdx, + const uint8_t splitDir, PelBuf &adBuf) +{ + int16_t angle = g_GeoParams[splitDir][0]; + int16_t wIdx = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX; + int16_t hIdx = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX; + int16_t stepX = 1; + int maskStride = 0; + int16_t *weight = nullptr; + + if (g_angle2mirror[angle] == 2) + { + stepX = 1; + maskStride = -GEO_WEIGHT_MASK_SIZE_EXT; + weight = &g_globalGeoWeightsTpl[g_angle2mask[angle]] + [(GEO_WEIGHT_MASK_SIZE_EXT - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][1] + - GEO_TM_ADDED_WEIGHT_MASK_SIZE) + * GEO_WEIGHT_MASK_SIZE_EXT + + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE]; + } + else if (g_angle2mirror[angle] == 1) + { + stepX = -1; + maskStride = GEO_WEIGHT_MASK_SIZE_EXT; + weight = &g_globalGeoWeightsTpl[g_angle2mask[angle]] + [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE) + * GEO_WEIGHT_MASK_SIZE_EXT + + (GEO_WEIGHT_MASK_SIZE_EXT - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0] + - GEO_TM_ADDED_WEIGHT_MASK_SIZE)]; + } + else + { + stepX = 1; + maskStride = GEO_WEIGHT_MASK_SIZE_EXT; + weight = &g_globalGeoWeightsTpl[g_angle2mask[angle]] + [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE) + * GEO_WEIGHT_MASK_SIZE_EXT + + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE]; + } + + int32_t iAdStride = adBuf.stride; + + // top template + Pel *piAd = adBuf.buf + templateWidth; // start point of adBuf is (-templateWidth, -templateHeight) of current block + Pel *weightTmp = weight - templateHeight * maskStride; + + int sum = 0; + + + for (int y = 0; y < templateHeight; y++) + { + for (int x = 0; x < width; x++) + { + sum += *piAd * (*weightTmp); + piAd++; + weightTmp += stepX; + } + piAd += (iAdStride - width); + weightTmp += (maskStride - width * stepX); + } + + // left template + piAd = adBuf.buf + templateHeight * iAdStride; + // start point of predBuf is (-templateWidth, -templateHeight) of current block + weightTmp = weight - templateWidth * stepX; + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < templateWidth; x++) + { + sum += *piAd * (*weightTmp); + piAd++; + weightTmp += stepX; + } + piAd += (iAdStride - templateWidth); + weightTmp += (maskStride - templateWidth * stepX); + } + return sum; +} + +void InterpolationFilter::xWeightedSgpm(const PredictionUnit &pu, const uint32_t width, const uint32_t height, + const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, + PelBuf &predSrc0, PelBuf &predSrc1) +{ + Pel * dst = predDst.buf; + Pel * src0 = predSrc0.buf; + Pel * src1 = predSrc1.buf; + int32_t strideDst = predDst.stride - width; + int32_t strideSrc0 = predSrc0.stride - width; + int32_t strideSrc1 = predSrc1.stride - width; + + const ClpRng clipRng = pu.cu->slice->clpRngs().comp[compIdx]; + + const int32_t shiftWeighted = 5; + const int32_t offsetWeighted = 16; + const uint32_t scaleX = getComponentScaleX(compIdx, pu.chromaFormat); + const uint32_t scaleY = getComponentScaleY(compIdx, pu.chromaFormat); + + int16_t angle = g_GeoParams[splitDir][0]; + int16_t wIdx = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX; + int16_t hIdx = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX; + int16_t stepX = 1 << scaleX; + int16_t stepY = 0; + int16_t *weight = nullptr; + + if (g_angle2mirror[angle] == 2) + { + stepY = -(int) ((GEO_WEIGHT_MASK_SIZE << scaleY) + pu.lwidth()); + weight = &g_globalGeoWeights + [GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())] + [g_angle2mask[angle]] + [(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][1]) + * GEO_WEIGHT_MASK_SIZE + + g_weightOffsetEx[splitDir][hIdx][wIdx][0]]; + } + else if (g_angle2mirror[angle] == 1) + { + stepX = -1 << scaleX; + stepY = (GEO_WEIGHT_MASK_SIZE << scaleY) + pu.lwidth(); + weight = &g_globalGeoWeights + [GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())] + [g_angle2mask[angle]] + [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0])]; + } + else + { + stepY = (GEO_WEIGHT_MASK_SIZE << scaleY) - pu.lwidth(); + weight = &g_globalGeoWeights + [GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())] + [g_angle2mask[angle]] + [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + + g_weightOffsetEx[splitDir][hIdx][wIdx][0]]; + } + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + *dst++ = ClipPel(rightShift((*weight * (*src0++) + ((32 - *weight) * (*src1++)) + offsetWeighted), shiftWeighted), clipRng); + weight += stepX; + } + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } +} +#endif #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING template <bool trueTFalseL> void InterpolationFilter::xWeightedGeoTpl(const PredictionUnit &pu, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h index ed2c33291aeb35a4892f741989627b6a68c98c63..57c3d879b5bf4360c61d097a34e7677fc7972c23 100644 --- a/source/Lib/CommonLib/InterpolationFilter.h +++ b/source/Lib/CommonLib/InterpolationFilter.h @@ -212,6 +212,25 @@ public: #else void( *m_weightedGeoBlkRounded )(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1); #endif +#endif +#if JVET_AB0155_SGPM + void (*m_weightedSgpm)(const PredictionUnit &pu, const uint32_t width, const uint32_t height, + const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, PelBuf &predSrc0, + PelBuf &predSrc1); + static void xWeightedSgpm(const PredictionUnit &pu, const uint32_t width, const uint32_t height, + const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, PelBuf &predSrc0, + PelBuf &predSrc1); + + int (*m_sadTM)(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf, PelBuf &adBuf); + static int xSadTM(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf, + PelBuf &adBuf); + int (*m_sgpmSadTM)(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, const uint8_t splitDir, PelBuf &adBuf); + static int xSgpmSadTM(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, const uint8_t splitDir, + PelBuf &adBuf); #endif void initInterpolationFilter( bool enable ); diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index 9d9afd03cb7b7cb474b34995f51ee7d2b69374b0..18762a82377b4916760abd66ce7f011b9656ede7 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -148,6 +148,14 @@ void IntraPrediction::destroy() #if JVET_W0123_TIMD_FUSION delete m_timdSatdCost; #endif +#if JVET_AB0155_SGPM + for (auto &buffer: m_sgpmBuffer) + { + buffer.destroy(); + } + m_sgpmBuffer.clear(); +#endif + delete[] m_piTemp; m_piTemp = nullptr; delete[] m_pMdlmTemp; @@ -202,6 +210,10 @@ void IntraPrediction::destroy() void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY) { +#if JVET_AB0155_SGPM + m_if.initInterpolationFilter(true); +#endif + #if MERGE_ENC_OPT if (m_currChromaFormat != chromaFormatIDC) { @@ -246,6 +258,21 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth m_timdSatdCost = new RdCost; } #endif +#if JVET_AB0155_SGPM + for (auto &buffer: m_sgpmBuffer) + { + buffer.destroy(); + } + + // the number of total temporal buffers can be adjusted by changing the number here + m_sgpmBuffer.resize(1); + + for (auto &buffer: m_sgpmBuffer) + { + buffer.create(CHROMA_400, Area(0, 0, MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE, MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE)); + } +#endif + if (m_piTemp == nullptr) { m_piTemp = new Pel[(MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1)]; @@ -807,7 +834,11 @@ int IntraPrediction::getModifiedWideAngle( int width, int height, int predMode ) } #if JVET_W0123_TIMD_FUSION +#if JVET_AB0155_SGPM +int IntraPrediction::getWideAngleExt(int width, int height, int predMode, bool bSgpm) +#else int IntraPrediction::getWideAngleExt( int width, int height, int predMode ) +#endif { if ( predMode > DC_IDX && predMode <= EXT_VDIA_IDX ) { @@ -815,11 +846,33 @@ int IntraPrediction::getWideAngleExt( int width, int height, int predMode ) int deltaSize = abs(floorLog2(width) - floorLog2(height)); if (width > height && predMode < 2 + modeShift[deltaSize]) { +#if JVET_AB0155_SGPM + if (bSgpm) + { + predMode += EXT_VDIA_IDX; + } + else + { + predMode += (EXT_VDIA_IDX - 1); + } +#else predMode += (EXT_VDIA_IDX - 1); +#endif } else if (height > width && predMode > EXT_VDIA_IDX - modeShift[deltaSize]) { +#if JVET_AB0155_SGPM + if (bSgpm) + { + predMode -= EXT_VDIA_IDX; + } + else + { + predMode -= (EXT_VDIA_IDX - 1); + } +#else predMode -= (EXT_VDIA_IDX - 1); +#endif } } return predMode; @@ -1101,6 +1154,46 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co } #endif +#if JVET_AB0155_SGPM + + if(PU::isSgpm(pu, channelType)) + { + int width = piPred.width; + int height = piPred.height; + const UnitArea localUnitArea(pu.chromaFormat, Area(0, 0, width, height)); + PelBuf predFusion = m_tempBuffer[1].getBuf(localUnitArea.Y()); + IntraPredParam m_ipaParam2 = m_ipaParam; + CompArea compArea = (compID == COMPONENT_Y) ? pu.Y() + : (compID == COMPONENT_Cb) ? pu.Cb() : pu.Cr(); + initIntraPatternChType(*pu.cu, compArea, false, 1); + const uint32_t uiDirMode2 = PU::getFinalIntraMode(pu, channelType, 1); + const CPelBuf &srcBuf2 = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride); + switch (uiDirMode2) + { + case (PLANAR_IDX): xPredIntraPlanar(srcBuf2, predFusion); break; + case (DC_IDX): xPredIntraDc(srcBuf2, predFusion, channelType, false); break; + default: xPredIntraAng(srcBuf2, predFusion, channelType, clpRng, bExtIntraDir); break; + } + + #if JVET_X0148_TIMD_PDPC +#if CIIP_PDPC + if ((m_ipaParam.applyPDPC || pu.ciipPDPC) && (uiDirMode2 == PLANAR_IDX || uiDirMode2 == DC_IDX)) +#else + if (m_ipaParam.applyPDPC && (uiDirMode2 == PLANAR_IDX || uiDirMode2 == DC_IDX)) +#endif + { + xIntraPredPlanarDcPdpc(srcBuf2, m_tempBuffer[1].getBuf(localUnitArea.Y()).buf, + m_tempBuffer[1].getBuf(localUnitArea.Y()).stride, iWidth, iHeight, pu.ciipPDPC); + } +#endif + + m_ipaParam = m_ipaParam2; + + int splitDir = pu.cu->sgpmSplitDir; + m_if.m_weightedSgpm(pu, width, height, compID, splitDir, piPred, piPred, predFusion); + } +#endif + #if !JVET_X0148_TIMD_PDPC #if CIIP_PDPC if (m_ipaParam.applyPDPC || pu.ciipPDPC) @@ -1370,7 +1463,11 @@ void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const Cha } // Function for initialization of intra prediction parameters +#if JVET_AB0155_SGPM +void IntraPrediction::initPredIntraParams(const PredictionUnit &pu, const CompArea area, const SPS &sps, const int partIdx) +#else void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompArea area, const SPS& sps) +#endif { const ComponentID compId = area.compID; const ChannelType chType = toChannelType(compId); @@ -1383,7 +1480,11 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA const Size cuSize = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height ); const Size puSize = Size( area.width, area.height ); const Size& blockSize = useISP ? cuSize : puSize; +#if JVET_AB0155_SGPM + const int dirMode = PU::getFinalIntraMode(pu, chType, partIdx); +#else const int dirMode = PU::getFinalIntraMode(pu, chType); +#endif #if JVET_W0123_TIMD_FUSION const int predMode = bExtIntraDir ? getWideAngleExt( blockSize.width, blockSize.height, dirMode ) : getModifiedWideAngle( blockSize.width, blockSize.height, dirMode ); #else @@ -2302,7 +2403,12 @@ inline int isLeftAvailable ( const CodingUnit &cu, const ChannelType &chT inline int isAboveRightAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags ); inline int isBelowLeftAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags ); +#if JVET_AB0155_SGPM +void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag, + const int partIdx) +#else void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag) +#endif { #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS CHECK(area.width == 2, "Width of 2 is not supported"); @@ -2311,16 +2417,27 @@ void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompAre if (!forceRefFilterFlag) { +#if JVET_AB0155_SGPM + initPredIntraParams(*cu.firstPU, area, *cs.sps, partIdx); +#else initPredIntraParams(*cu.firstPU, area, *cs.sps); +#endif } Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED]; Pel *refBufFiltered = m_refBuffer[area.compID][PRED_BUF_FILTERED]; setReferenceArrayLengths( area ); - +#if JVET_AB0155_SGPM + if (!partIdx) + { + // ----- Step 1: unfiltered reference samples ----- + xFillReferenceSamples(cs.picture->getRecoBuf(area), refBufUnfiltered, area, cu); + } +#else // ----- Step 1: unfiltered reference samples ----- xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu ); +#endif // ----- Step 2: filtered reference samples ----- if( m_ipaParam.refFilterFlag || forceRefFilterFlag ) { @@ -3202,12 +3319,19 @@ void IntraPrediction::xPredTimdIntraDc( const PredictionUnit &pu, const CPelBuf } } +#if JVET_AB0155_SGPM +void IntraPrediction::initPredTimdIntraParams(const PredictionUnit &pu, const CompArea area, int dirMode, bool bSgpm) +#else void IntraPrediction::initPredTimdIntraParams(const PredictionUnit & pu, const CompArea area, int dirMode) +#endif { const Size puSize = Size( area.width, area.height ); const Size& blockSize = puSize; +#if JVET_AB0155_SGPM + const int predMode = getWideAngleExt(blockSize.width, blockSize.height, dirMode, bSgpm); +#else const int predMode = getWideAngleExt( blockSize.width, blockSize.height, dirMode ); - +#endif m_ipaParam.isModeVer = predMode >= EXT_DIA_IDX; m_ipaParam.refFilterFlag = false; m_ipaParam.interpolationFlag = false; @@ -3838,6 +3962,662 @@ void IntraPrediction::xFillTimdReferenceSamples(const CPelBuf &recoBuf, Pel* ref } } +#if JVET_AB0155_SGPM +void IntraPrediction::deriveSgpmModeOrdered(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu, + static_vector<SgpmInfo, SGPM_NUM> &candModeList, + static_vector<double, SGPM_NUM> & candCostList) +{ + SizeType uiWidth = cu.lwidth(); + SizeType uiHeight = cu.lheight(); + + int iCurX = cu.lx(); + int iCurY = cu.ly(); + int iRefX = -1, iRefY = -1; + uint32_t uiRefWidth = 0, uiRefHeight = 0; + + const int iTempWidth = SGPM_TEMPLATE_SIZE, iTempHeight = SGPM_TEMPLATE_SIZE; + + TEMPLATE_TYPE eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX, + iRefY, uiRefWidth, uiRefHeight); + auto & pu = *cu.firstPU; + uint32_t uiRealW = uiRefWidth + (eTempType == LEFT_NEIGHBOR ? iTempWidth : 0); + uint32_t uiRealH = uiRefHeight + (eTempType == ABOVE_NEIGHBOR ? iTempHeight : 0); + + const UnitArea localUnitArea(pu.chromaFormat, Area(0, 0, uiRealW, uiRealH)); + uint32_t uiPredStride = m_sgpmBuffer[0].getBuf(localUnitArea.Y()).stride; + CHECK(eTempType != LEFT_ABOVE_NEIGHBOR, "left and above both should exist"); + + const CodingStructure &cs = *cu.cs; + m_ipaParam.multiRefIndex = iTempWidth; + Pel *piOrg = cs.picture->getRecoBuf(area).buf; + int iOrgStride = cs.picture->getRecoBuf(area).stride; + piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX); + + initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, + eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight); + + Distortion sadWholeTM[NUM_LUMA_MODE]; + Distortion sadPartsTM[NUM_LUMA_MODE][GEO_NUM_PARTITION_MODE]; + uint8_t ipmList[GEO_NUM_PARTITION_MODE][2][SGPM_NUM_MPM]; + bool sadPartsNeeded[NUM_LUMA_MODE][GEO_NUM_PARTITION_MODE] = {}; + bool ipmNeeded[NUM_LUMA_MODE] = {}; + + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + if (!g_sgpm_splitDir[splitDir]) + { + continue; + } + + int16_t angle = g_GeoParams[splitDir][0]; + for (int partIdx = 0; partIdx < 2; partIdx++) + { + PU::getSgpmIntraMPMs(pu, ipmList[splitDir][partIdx], splitDir, g_geoTmShape[partIdx][angle]); + for (int modeIdx = 0; modeIdx < SGPM_NUM_MPM; modeIdx++) + { + int ipmIdx = ipmList[splitDir][partIdx][modeIdx]; + ipmNeeded[ipmIdx] = true; + sadPartsNeeded[ipmIdx][splitDir] = true; + } + } + } + + for (int ipmIdx = 0; ipmIdx < NUM_LUMA_MODE; ipmIdx++) + { + if (ipmNeeded[ipmIdx]) + { + int iMode = MAP67TO131(ipmIdx); + initPredTimdIntraParams(pu, area, iMode, true); + Pel *tempPred = m_sgpmBuffer[0].getBuf(localUnitArea.Y()).buf; + predTimdIntraAng(COMPONENT_Y, pu, iMode, tempPred, uiPredStride, uiRealW, uiRealH, eTempType, + (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); + + PelBuf predBuf = m_sgpmBuffer[0].getBuf(localUnitArea.Y()); + PelBuf recBuf = cs.picture->getRecoBuf(area); + PelBuf adBuf = m_sgpmBuffer[0].getBuf(localUnitArea.Y()); + + sadWholeTM[ipmIdx] = + m_if.m_sadTM(pu, uiWidth, uiHeight, iTempWidth, iTempHeight, COMPONENT_Y, predBuf, recBuf, adBuf); + + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + if (sadPartsNeeded[ipmIdx][splitDir]) + { + sadPartsTM[ipmIdx][splitDir] = + m_if.m_sgpmSadTM(pu, uiWidth, uiHeight, iTempWidth, iTempHeight, COMPONENT_Y, splitDir, adBuf); + } + } + } + } + // check every possible combination + uint32_t cntComb = 0; + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + if (!g_sgpm_splitDir[splitDir]) + { + continue; + } + + for (int mode0Idx = 0; mode0Idx < SGPM_NUM_MPM; mode0Idx++) + { + for (int mode1Idx = 0; mode1Idx < SGPM_NUM_MPM; mode1Idx++) + { + int ipm0Idx = ipmList[splitDir][0][mode0Idx]; + int ipm1Idx = ipmList[splitDir][1][mode1Idx]; + if (ipm0Idx == ipm1Idx) + { + continue; + } + + double cost = static_cast<double>(sadPartsTM[ipm0Idx][splitDir]) + static_cast<double>(sadWholeTM[ipm1Idx]) + - static_cast<double>(sadPartsTM[ipm1Idx][splitDir]); + + cntComb++; + + if ((cntComb > SGPM_NUM && cost < candCostList[SGPM_NUM - 1]) || cntComb <= SGPM_NUM) + { + updateCandList(SgpmInfo(splitDir, ipm0Idx, ipm1Idx), cost, candModeList, candCostList, SGPM_NUM); + } + } + } + } +} +#endif + +#if JVET_AB0155_SGPM +int IntraPrediction::deriveTimdMode(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu, bool bFull, bool bHorVer) +{ + int channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); + SizeType uiWidth = cu.lwidth(); + SizeType uiHeight = cu.lheight(); + + static Pel PredLuma[(MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE)]; + memset(PredLuma, 0, (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * sizeof(Pel)); + Pel * piPred = PredLuma; + uint32_t uiPredStride = MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE; + + int iCurX = cu.lx(); + int iCurY = cu.ly(); + int iRefX = -1, iRefY = -1; + uint32_t uiRefWidth = 0, uiRefHeight = 0; + + int iTempWidth = 4, iTempHeight = 4; + if (uiWidth <= 8) + { + iTempWidth = 2; + } + if (uiHeight <= 8) + { + iTempHeight = 2; + } + + TEMPLATE_TYPE eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX, + iRefY, uiRefWidth, uiRefHeight); + + if (eTempType != NO_NEIGHBOR) + { + const CodingStructure &cs = *cu.cs; + m_ipaParam.multiRefIndex = iTempWidth; + Pel *piOrg = cs.picture->getRecoBuf(area).buf; + int iOrgStride = cs.picture->getRecoBuf(area).stride; + piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX); + DistParam distParamSad[2]; // above, left + distParamSad[0].applyWeight = false; + distParamSad[0].useMR = false; + distParamSad[1].applyWeight = false; + distParamSad[1].useMR = false; + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, + uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, + true); // Use HAD (SATD) cost + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, + piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); // Use HAD (SATD) cost + } + else if (eTempType == LEFT_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); + } + initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, + eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight); + + uint32_t uiIntraDirNeighbor[5] = { 0 }, modeIdx = 0; + bool includedMode[EXT_VDIA_IDX + 1]; + memset(includedMode, false, (EXT_VDIA_IDX + 1) * sizeof(bool)); + auto & pu = *cu.firstPU; + uint32_t uiRealW = uiRefWidth + (eTempType == LEFT_NEIGHBOR ? iTempWidth : 0); + uint32_t uiRealH = uiRefHeight + (eTempType == ABOVE_NEIGHBOR ? iTempHeight : 0); + uint64_t maxCost = (uint64_t)(iTempWidth * cu.lheight() + iTempHeight * cu.lwidth()); + + uint64_t uiBestCost = MAX_UINT64; + int iBestMode = PLANAR_IDX; + uint64_t uiSecondaryCost = MAX_UINT64; + int iSecondaryMode = PLANAR_IDX; + + uint64_t uiBestCostHor = MAX_UINT64; + uint64_t uiBestCostVer = MAX_UINT64; + int iBestModeHor = PLANAR_IDX; + int iBestModeVer = PLANAR_IDX; + + const Position posLTx = pu.Y().topLeft(); + const Position posRTx = pu.Y().topRight(); + const Position posLBx = pu.Y().bottomLeft(); + + // left + const PredictionUnit *puLeftx = pu.cs->getPURestricted(posLBx.offset(-1, 0), pu, pu.chType); + if (puLeftx && CU::isIntra(*puLeftx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puLeftx); + if (!puLeftx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if (!includedMode[uiIntraDirNeighbor[modeIdx]]) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // above + const PredictionUnit *puAbovex = pu.cs->getPURestricted(posRTx.offset(0, -1), pu, pu.chType); + if (puAbovex && CU::isIntra(*puAbovex->cu) && CU::isSameCtu(*pu.cu, *puAbovex->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puAbovex); + if (!puAbovex->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if (!includedMode[uiIntraDirNeighbor[modeIdx]]) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // below left + const PredictionUnit *puLeftBottomx = cs.getPURestricted(posLBx.offset(-1, 1), pu, pu.chType); + if (puLeftBottomx && CU::isIntra(*puLeftBottomx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puLeftBottomx); + if (!puLeftBottomx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if (!includedMode[uiIntraDirNeighbor[modeIdx]]) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // above right + const PredictionUnit *puAboveRightx = cs.getPURestricted(posRTx.offset(1, -1), pu, pu.chType); + if (puAboveRightx && CU::isIntra(*puAboveRightx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puAboveRightx); + if (!puAboveRightx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if (!includedMode[uiIntraDirNeighbor[modeIdx]]) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + // above left + const PredictionUnit *puAboveLeftx = cs.getPURestricted(posLTx.offset(-1, -1), pu, pu.chType); + if (puAboveLeftx && CU::isIntra(*puAboveLeftx->cu)) + { + uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puAboveLeftx); + if (!puAboveLeftx->cu->timd) + { + uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]); + } + if (!includedMode[uiIntraDirNeighbor[modeIdx]]) + { + includedMode[uiIntraDirNeighbor[modeIdx]] = true; + modeIdx++; + } + } + bool bNoAngular = false; + if (modeIdx >= 2) + { + bNoAngular = true; + for (uint32_t i = 0; i < modeIdx; i++) + { + if (uiIntraDirNeighbor[i] > DC_IDX) + { + bNoAngular = false; + break; + } + } + } + + if (bNoAngular) + { + if (bFull) + { + for (int iMode = 0; iMode <= 1; iMode++) + { + uint64_t uiCost = 0; + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, + (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, + (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if (uiCost < uiBestCost) + { + uiBestCost = uiCost; + iBestMode = iMode; + } + if (uiBestCost <= maxCost) + { + break; + } + } + cu.timdMode = iBestMode; + cu.timdIsBlended = false; + } + if (bHorVer) + { + cu.timdHor = PLANAR_IDX; + cu.timdVer = PLANAR_IDX; + } + return iBestMode; + } +#if SECONDARY_MPM + uint8_t mpmList[NUM_MOST_PROBABLE_MODES]; + uint8_t intraNonMPM[NUM_NON_MPM_MODES]; + PU::getIntraMPMs(pu, mpmList, intraNonMPM); +#else + unsigned mpmList[NUM_MOST_PROBABLE_MODES]; + PU::getIntraMPMs(pu, mpmList); +#endif + unsigned mpmExtraList[NUM_MOST_PROBABLE_MODES + 3]; // +DC/VER/HOR + int maxModeNum = NUM_MOST_PROBABLE_MODES; + unsigned modeCandList[3] = { DC_IDX, HOR_IDX, VER_IDX }; + bool bNotExist[3] = { true, true, true }; + for (int i = 0; i < NUM_MOST_PROBABLE_MODES; i++) + { + mpmExtraList[i] = mpmList[i]; + if (bNotExist[0] && mpmList[i] == DC_IDX) + { + bNotExist[0] = false; + } + if (bNotExist[1] && mpmList[i] == HOR_IDX) + { + bNotExist[1] = false; + } + if (bNotExist[2] && mpmList[i] == VER_IDX) + { + bNotExist[2] = false; + } + } + for (int i = 0; i < 3; i++) + { + if (bNotExist[i]) + { + mpmExtraList[maxModeNum++] = modeCandList[i]; + } + } + bool updateFull = true; + for (int i = 0; i < maxModeNum; i++) + { + uint64_t uiCost = 0; + int iMode = mpmExtraList[i]; + uint64_t uiCostVer = -1; + uint64_t uiCostHor = -1; + uint64_t tmpCost0 = 0; + uint64_t tmpCost1 = 0; + if (iMode > DC_IDX) + { + iMode = MAP67TO131(iMode); + } + else + { + if (!bFull && bHorVer) + { + continue; + } + } + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, + (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + if (bFull && updateFull) + { + tmpCost0 = distParamSad[0].distFunc(distParamSad[0]); + tmpCost1 = distParamSad[1].distFunc(distParamSad[1]); + } + else + { + if (iMode > EXT_DIA_IDX) + { + tmpCost0 = distParamSad[0].distFunc(distParamSad[0]); + } + else + { + tmpCost1 = distParamSad[1].distFunc(distParamSad[1]); + } + } + } + else if (eTempType == LEFT_NEIGHBOR) + { + tmpCost0 = distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + tmpCost1 = distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if (bFull && updateFull) + { + uiCost = tmpCost0 + tmpCost1; + if (uiCost < uiBestCost) + { + uiSecondaryCost = uiBestCost; + iSecondaryMode = iBestMode; + uiBestCost = uiCost; + iBestMode = iMode; + } + else if (uiCost < uiSecondaryCost) + { + uiSecondaryCost = uiCost; + iSecondaryMode = iMode; + } + if (uiSecondaryCost <= maxCost) + { + updateFull = false; + if (!bHorVer) + { + break; + } + } + } + if (bHorVer && iMode > DC_IDX) + { + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + if (iMode > EXT_DIA_IDX) + { + uiCostVer += tmpCost0; + } + else + { + uiCostHor += tmpCost1; + } + } + else if (eTempType == LEFT_NEIGHBOR) + { + uiCostHor += tmpCost1; + } + else if (eTempType == ABOVE_NEIGHBOR) + { + uiCostVer += tmpCost0; + } + if (uiCostHor < uiBestCostHor) + { + uiBestCostHor = uiCostHor; + iBestModeHor = iMode; + } + if (uiCostVer < uiBestCostVer) + { + uiBestCostVer = uiCostVer; + iBestModeVer = iMode; + } + } + + } + + if(bFull) + { + int midMode = iBestMode; + if (midMode > DC_IDX && uiBestCost > maxCost) + { + for (int i = -1; i <= 1; i += 2) + { + int iMode = midMode + i; + if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX) + { + continue; + } + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, + (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, + (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); + uint64_t uiCost = 0; + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if (uiCost < uiBestCost) + { + uiBestCost = uiCost; + iBestMode = iMode; + } + if (uiBestCost <= maxCost) + { + break; + } + } + } + + midMode = iSecondaryMode; + if (midMode > DC_IDX && uiSecondaryCost > maxCost) + { + for (int i = -1; i <= 1; i += 2) + { + int iMode = midMode + i; + if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX) + { + continue; + } + initPredTimdIntraParams(pu, area, iMode); + predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, + (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, + (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight); + uint64_t uiCost = 0; + if (eTempType == LEFT_ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + uiCost += distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == LEFT_NEIGHBOR) + { + uiCost = distParamSad[1].distFunc(distParamSad[1]); + } + else if (eTempType == ABOVE_NEIGHBOR) + { + uiCost += distParamSad[0].distFunc(distParamSad[0]); + } + else + { + assert(0); + } + + if (uiCost < uiSecondaryCost) + { + uiSecondaryCost = uiCost; + iSecondaryMode = iMode; + } + if (uiSecondaryCost <= maxCost) + { + break; + } + } + } + + // if( uiSecondaryCost < 2 * uiBestCost ), 2 * uiBestCost can overflow uint64_t + if (uiSecondaryCost < uiBestCost || (uiSecondaryCost - uiBestCost < uiBestCost)) + { + cu.timdMode = iBestMode; + cu.timdIsBlended = true; + cu.timdModeSecondary = iSecondaryMode; + + const int blend_sum_weight = 6; + int sum_weight = 1 << blend_sum_weight; + +#if JVET_X0149_TIMD_DIMD_LUT + int g_gradDivTable[16] = { 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 }; + uint64_t s0 = uiSecondaryCost; + // uiBestCost + uiSecondaryCost can overlow uint64_t + uint64_t s1 = (MAX_UINT64 - uiSecondaryCost < uiBestCost) ? MAX_UINT64 : (uiBestCost + uiSecondaryCost); + int x = floorLog2_uint64(s1); + CHECK(x < 0, "floor log2 value should be no negative"); + int norm_s1 = int(s1 << 4 >> x) & 15; + int v = g_gradDivTable[norm_s1] | 8; + x += (norm_s1 != 0); + int shift = x + 3; + int add = (1 << (shift - 1)); + int iRatio = int((s0 * v * sum_weight + add) >> shift); + + if (iRatio > sum_weight) + { + iRatio = sum_weight; + } + + CHECK(iRatio > sum_weight, "Wrong DIMD ratio"); +#else + double dRatio = 0.0; + dRatio = (double) uiSecondaryCost / (double) (uiBestCost + uiSecondaryCost); + int iRatio = static_cast<int>(dRatio * sum_weight + 0.5); +#endif + cu.timdFusionWeight[0] = iRatio; + cu.timdFusionWeight[1] = sum_weight - iRatio; + } + else + { + cu.timdMode = iBestMode; + cu.timdIsBlended = false; + } + } + if (bHorVer) + { + cu.timdHor = iBestModeHor; + cu.timdVer = iBestModeVer; + } + + return iBestMode; + } + else + { + if (bFull) + { + cu.timdMode = PLANAR_IDX; + cu.timdIsBlended = false; + } + if (bHorVer) + { + cu.timdHor = PLANAR_IDX; + cu.timdVer = PLANAR_IDX; + } + return PLANAR_IDX; + } +} +#else // SGPM + int IntraPrediction::deriveTimdMode( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu ) { int channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA); @@ -4260,6 +5040,7 @@ int IntraPrediction::deriveTimdMode( const CPelBuf &recoBuf, const CompArea &are return PLANAR_IDX; } } +#endif #if INTRA_TRANS_ENC_OPT void IntraPrediction::timdBlending(Pel *pDst, int strideDst, Pel *pSrc, int strideSrc, int w0, int w1, int width, int height) { diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h index 65225dff96000c1ccce2ed826e91ce7cc9c8805f..2a49bfa090905e8b443d3ac86f3263dcfa1f781e 100644 --- a/source/Lib/CommonLib/IntraPrediction.h +++ b/source/Lib/CommonLib/IntraPrediction.h @@ -48,7 +48,9 @@ #endif #include "MatrixIntraPrediction.h" - +#if JVET_AB0155_SGPM +#include "CommonLib/InterpolationFilter.h" +#endif //! \ingroup CommonLib //! \{ @@ -164,6 +166,9 @@ public: protected: Pel m_refBuffer[MAX_NUM_COMPONENT][NUM_PRED_BUF][(MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX) * 2]; uint32_t m_refBufferStride[MAX_NUM_COMPONENT]; +#if JVET_AB0155_SGPM + InterpolationFilter m_if; +#endif private: @@ -248,7 +253,9 @@ protected: ScanElement* m_scanOrder; bool m_bestScanRotationMode; std::vector<PelStorage> m_tempBuffer; - +#if JVET_AB0155_SGPM + std::vector<PelStorage> m_sgpmBuffer; +#endif #if JVET_V0130_INTRA_TMP int m_uiPartLibSize; TempLibFast m_tempLibFast; @@ -267,8 +274,11 @@ protected: #else void xPredIntraAng ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng); #endif - +#if JVET_AB0155_SGPM + void initPredIntraParams(const PredictionUnit &pu, const CompArea compArea, const SPS &sps, const int partIdx = 0); +#else void initPredIntraParams ( const PredictionUnit & pu, const CompArea compArea, const SPS& sps ); +#endif static bool isIntegerSlope(const int absAng) { return (0 == (absAng & 0x1F)); } #if JVET_W0123_TIMD_FUSION @@ -288,8 +298,13 @@ protected: static int getModifiedWideAngle ( int width, int height, int predMode ); #if JVET_W0123_TIMD_FUSION +#if JVET_AB0155_SGPM + static int getWideAngleExt(int width, int height, int predMode, bool bSgpm = false); +#else static int getWideAngleExt ( int width, int height, int predMode ); #endif +#endif + void setReferenceArrayLengths ( const CompArea &area ); void destroy (); @@ -362,9 +377,17 @@ public: void xIntraPredTimdAngPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale, int invAngle); void xFillTimdReferenceSamples ( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu, int iTemplateWidth, int iTemplateHeight ); Pel xGetPredTimdValDc ( const CPelBuf &pSrc, const Size &dstSize, TEMPLATE_TYPE eTempType, int iTempHeight, int iTempWidth ); +#if JVET_AB0155_SGPM + void initPredTimdIntraParams(const PredictionUnit &pu, const CompArea area, int dirMode, bool bSgpm = false); +#else void initPredTimdIntraParams (const PredictionUnit & pu, const CompArea area, int dirMode); +#endif void predTimdIntraAng ( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TEMPLATE_TYPE eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight); +#if JVET_AB0155_SGPM + int deriveTimdMode ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu, bool bFull = true, bool bHorVer = false ); +#else int deriveTimdMode ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu ); +#endif void initTimdIntraPatternLuma (const CodingUnit &cu, const CompArea &area, int iTemplateWidth, int iTemplateHeight, uint32_t uiRefWidth, uint32_t uiRefHeight); #if GRAD_PDPC void xIntraPredTimdAngGradPdpc (Pel* pDsty, const int dstStride, Pel* refMain, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, int deltaPos, int intraPredAngle, const ClpRng& clpRng); @@ -377,6 +400,11 @@ public: #endif #endif #endif +#if JVET_AB0155_SGPM + void deriveSgpmModeOrdered(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu, + static_vector<SgpmInfo, SGPM_NUM> &candModeList, + static_vector<double, SGPM_NUM> & candCostList); +#endif #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING && JVET_Y0065_GPM_INTRA protected: bool m_abFilledIntraGPMRefTpl[NUM_INTRA_MODE]; @@ -411,7 +439,14 @@ public: Pel xGlmGetLumaVal (const int s[6], const int c[6], const int glmIdx, const Pel val) const; #endif /// set parameters from CU data for accessing intra data - void initIntraPatternChType (const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers + +#if JVET_AB0155_SGPM + void initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false, + const int partIdx = 0); // use forceRefFilterFlag to get both filtered and unfiltered buffers +#else // SGPM + void initIntraPatternChType( + const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers +#endif void initIntraPatternChTypeISP (const CodingUnit& cu, const CompArea& area, PelBuf& piReco, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers // Matrix-based intra prediction diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index 4244a632d8e679d4a2b0425ce5e01be6615238b7..b17ede6742cedb5b73028195893deb3d1f874b10 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -5132,7 +5132,7 @@ void initGeoTemplate() modeIdx++; } } -#if JVET_AA0058_GPM_ADP_BLD +#if JVET_AA0058_GPM_ADP_BLD || JVET_AB0155_SGPM // initialization of blending weights for (int angleIdx = 0; angleIdx < (GEO_NUM_ANGLES >> 2) + 1; angleIdx++) { @@ -5140,7 +5140,11 @@ void initGeoTemplate() { continue; } +#if JVET_AB0155_SGPM + for (int bldIdx = 0; bldIdx < TOTAL_GEO_NUM_BLD; bldIdx++) +#else for (int bldIdx = 0; bldIdx < GEO_NUM_BLD; bldIdx++) +#endif { g_globalGeoWeights[bldIdx][g_angle2mask[angleIdx]] = new int16_t[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE]; @@ -5179,7 +5183,7 @@ void initGeoTemplate() { continue; } -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM g_globalGeoWeightsTpl[g_angle2mask[angleIdx]] = new Pel[GEO_WEIGHT_MASK_SIZE_EXT * GEO_WEIGHT_MASK_SIZE_EXT]; #endif #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT @@ -5193,7 +5197,7 @@ void initGeoTemplate() int16_t rho = (g_Dis[distanceX] << (GEO_MAX_CU_LOG2 + 1)) + (g_Dis[distanceY] << (GEO_MAX_CU_LOG2 + 1)); static const int16_t maskOffset = (2 * GEO_MAX_CU_SIZE - GEO_WEIGHT_MASK_SIZE) >> 1; int index = 0; -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM int indexGeoWeight = 0; for (int y = -GEO_TM_ADDED_WEIGHT_MASK_SIZE; y < GEO_WEIGHT_MASK_SIZE + GEO_TM_ADDED_WEIGHT_MASK_SIZE; y++) { @@ -5231,7 +5235,7 @@ void initGeoTemplate() continue; } g_globalGeoWeights[g_angle2mask[angleIdx]] = new int16_t[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE]; -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM g_globalGeoWeightsTpl[g_angle2mask[angleIdx]] = new Pel[GEO_WEIGHT_MASK_SIZE_EXT * GEO_WEIGHT_MASK_SIZE_EXT]; #endif #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT @@ -5245,7 +5249,7 @@ void initGeoTemplate() int16_t rho = (g_Dis[distanceX] << (GEO_MAX_CU_LOG2+1)) + (g_Dis[distanceY] << (GEO_MAX_CU_LOG2 + 1)); static const int16_t maskOffset = (2*GEO_MAX_CU_SIZE - GEO_WEIGHT_MASK_SIZE) >> 1; int index = 0; -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM int indexGeoWeight = 0; for( int y = -GEO_TM_ADDED_WEIGHT_MASK_SIZE; y < GEO_WEIGHT_MASK_SIZE + GEO_TM_ADDED_WEIGHT_MASK_SIZE; y++ ) { @@ -5309,18 +5313,50 @@ void initGeoTemplate() } } } - +#if JVET_AB0155_SGPM + for (int hIdx = 0; hIdx < GEO_NUM_CU_SIZE_EX; hIdx++) + { + int16_t height = 1 << (hIdx + GEO_MIN_CU_LOG2_EX); + for (int wIdx = 0; wIdx < GEO_NUM_CU_SIZE_EX; wIdx++) + { + int16_t width = 1 << (wIdx + GEO_MIN_CU_LOG2_EX); + for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++) + { + int16_t angle = g_GeoParams[splitDir][0]; + int16_t distance = g_GeoParams[splitDir][1]; + int16_t offsetX = (GEO_WEIGHT_MASK_SIZE - width) >> 1; + int16_t offsetY = (GEO_WEIGHT_MASK_SIZE - height) >> 1; + if (distance > 0) + { + if (angle % 16 == 8 || (angle % 16 != 0 && height >= width)) + { + offsetY += angle < 16 ? ((distance * (int32_t) height) >> 3) : -((distance * (int32_t) height) >> 3); + } + else + { + offsetX += angle < 16 ? ((distance * (int32_t) width) >> 3) : -((distance * (int32_t) width) >> 3); + } + } + g_weightOffsetEx[splitDir][hIdx][wIdx][0] = offsetX; + g_weightOffsetEx[splitDir][hIdx][wIdx][1] = offsetY; + } + } + } +#endif } int16_t** g_GeoParams; -#if JVET_AA0058_GPM_ADP_BLD +#if JVET_AB0155_SGPM +int16_t *g_globalGeoWeights[TOTAL_GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK]; +int g_bld2Width[TOTAL_GEO_NUM_BLD] = { 1, 2, 4, 8, 16, 32 }; +#elif JVET_AA0058_GPM_ADP_BLD int16_t* g_globalGeoWeights[GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK]; int g_bld2Width[GEO_NUM_BLD] = { 1, 2, 4, 8, 16 }; #else int16_t* g_globalGeoWeights [GEO_NUM_PRESTORED_MASK]; #endif -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM Pel* g_globalGeoWeightsTpl[GEO_NUM_PRESTORED_MASK]; #endif #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT @@ -5328,6 +5364,20 @@ Pel* g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK]; #else int16_t* g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK]; #endif +#if JVET_AB0155_SGPM +int16_t g_weightOffsetEx[GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE_EX][GEO_NUM_CU_SIZE_EX][2]; +int8_t g_sgpm_splitDir[GEO_NUM_PARTITION_MODE] = { +1,1,0,0,0,0,1,0, +1,0,1,0,1,0,1,0, +1,0,1,1,1,0,1,0, +1,0,1,0,1,0,1,0, +0,0,0,0,1,1,0,0, +0,0,1,0,0,1,0,0, +1,0,1,1,0,1,0,0, +1,0,0,1,0,0,0,0 +}; +#endif + int16_t g_weightOffset [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][GEO_NUM_CU_SIZE][2]; int8_t g_angle2mask[GEO_NUM_ANGLES] = { 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1, 4, 3, 2, 1, -1, 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1, 4, 3, 2, 1, -1 }; int8_t g_Dis[GEO_NUM_ANGLES] = { 8, 8, 8, 8, 4, 4, 2, 1, 0, -1, -2, -4, -4, -8, -8, -8, -8, -8, -8, -8, -4, -4, -2, -1, 0, 1, 2, 4, 4, 8, 8, 8 }; diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h index b782d80f43558205873c2ff0d03596435dfcd961..ad83eea26461b138a12b4b2fd2db7b86ee4180ed 100644 --- a/source/Lib/CommonLib/Rom.h +++ b/source/Lib/CommonLib/Rom.h @@ -342,13 +342,16 @@ const int g_IBCBufferSize = 256 * 128; void initGeoTemplate(); extern int16_t** g_GeoParams; -#if JVET_AA0058_GPM_ADP_BLD +#if JVET_AB0155_SGPM +extern int16_t *g_globalGeoWeights[TOTAL_GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK]; +extern int g_bld2Width[TOTAL_GEO_NUM_BLD]; +#elif JVET_AA0058_GPM_ADP_BLD extern int16_t* g_globalGeoWeights [GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK]; extern int g_bld2Width [GEO_NUM_BLD]; #else extern int16_t* g_globalGeoWeights [GEO_NUM_PRESTORED_MASK]; #endif -#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING +#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM extern Pel* g_globalGeoWeightsTpl[GEO_NUM_PRESTORED_MASK]; #endif #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT @@ -360,6 +363,11 @@ extern int16_t g_weightOffset [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][ extern int8_t g_angle2mask [GEO_NUM_ANGLES]; extern int8_t g_Dis[GEO_NUM_ANGLES]; extern int8_t g_angle2mirror[GEO_NUM_ANGLES]; + +#if JVET_AB0155_SGPM +extern int16_t g_weightOffsetEx[GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE_EX][GEO_NUM_CU_SIZE_EX][2]; +extern int8_t g_sgpm_splitDir[GEO_NUM_PARTITION_MODE]; +#endif #if JVET_Y0065_GPM_INTRA extern int8_t g_geoAngle2IntraAng [GEO_NUM_ANGLES]; #endif diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 4e498324b34cd1bb678335c3437e10a8efd7da58..b15d84665caa77ec5fc0789a7d92d2cc7c8bca2c 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -325,6 +325,9 @@ class ConstraintInfo #if JVET_W0123_TIMD_FUSION bool m_noTimdConstraintFlag; #endif +#if JVET_AB0155_SGPM + bool m_noSgpmConstraintFlag; +#endif #if ENABLE_OBMC bool m_noObmcConstraintFlag; #endif @@ -635,6 +638,10 @@ public: bool getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; } void setNoTimdConstraintFlag(bool bVal) { m_noTimdConstraintFlag = bVal; } #endif +#if JVET_AB0155_SGPM + bool getNoSgpmConstraintFlag() const { return m_noSgpmConstraintFlag; } + void setNoSgpmConstraintFlag(bool bVal) { m_noSgpmConstraintFlag = bVal; } +#endif #if ENABLE_OBMC bool getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; } void setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; } @@ -1717,6 +1724,9 @@ private: #if JVET_W0123_TIMD_FUSION bool m_timd; #endif +#if JVET_AB0155_SGPM + bool m_sgpm; +#endif #if JVET_V0130_INTRA_TMP bool m_intraTMP; ///< intra Template Matching unsigned m_intraTmpMaxSize; ///< max CU size for which intra TMP is allowed @@ -2258,6 +2268,11 @@ void setCCALFEnabledFlag( bool b ) void setUseTimd ( bool b ) { m_timd = b; } bool getUseTimd () const { return m_timd; } #endif +#if JVET_AB0155_SGPM + void setUseSgpm (bool b) { m_sgpm = b; } + bool getUseSgpm () const { return m_sgpm; } +#endif + #if ENABLE_OBMC void setUseOBMC ( bool b ) { m_OBMC = b; } bool getUseOBMC () const { return m_OBMC; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index f8b39827d48ff6fe169d73854f7c3c7af5261caa..aa7735cfb4e9a8cc7fea2d0361a3f427ea613516 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -502,6 +502,12 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID ) { intraMode = PLANAR_IDX; } +#if JVET_AB0155_SGPM + if (PU::isSgpm(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]]; + } +#endif #if JVET_V0130_INTRA_TMP if( PU::isTmp( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ) ) { @@ -734,6 +740,12 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons intraMode = PLANAR_IDX; } #endif +#if JVET_AB0155_SGPM + if (PU::isSgpm(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]]; + } +#endif #if JVET_W0123_TIMD_FUSION if (tu.cu->timd && compID == COMPONENT_Y) { @@ -1128,6 +1140,12 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int & { predMode = MAP131TO67(predMode); } +#endif +#if JVET_AB0155_SGPM + if (tu.cu->sgpm) + { + predMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]]; + } #endif int ucMode; int nMdIdx; @@ -2202,6 +2220,12 @@ int TrQuant::getLfnstIdx(const TransformUnit &tu, ComponentID compID) intraMode = PLANAR_IDX; } #endif +#if JVET_AB0155_SGPM + if (PU::isSgpm(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID))) + { + intraMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]]; + } +#endif #if JVET_W0123_TIMD_FUSION if (tu.cu->timd && compID == COMPONENT_Y) { diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 9f04d107643a32c94485b1207436faa2b86dd943..bfa93f969d4e21016310b87cef69476c3a7cbbca 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -146,7 +146,7 @@ #define JVET_AB0174_CCCM_DIV_FREE 1 // JVET-AB0174: CCCM with division free operation #endif #define JVET_AB0061_ITMP_BV_FOR_IBC 1 // JVET-AB0061: Storing IntraTMP BV for IBC BV prediction - +#define JVET_AB0155_SGPM 1 // JVET-AB0155: spatial geometric partitioning mode //IBC #define JVET_Y0058_IBC_LIST_MODIFY 1 // JVET-Y0058: Modifications of IBC merge/AMVP list construction, ARMC-TM-IBC part is included under JVET_W0090_ARMC_TM @@ -1651,7 +1651,24 @@ enum RESHAPE_SIGNAL_TYPE RESHAPE_SIGNAL_NULL = 100, }; +#if JVET_AB0155_SGPM +struct SgpmInfo +{ + int sgpmSplitDir; + int sgpmMode0; + int sgpmMode1; + SgpmInfo() : sgpmSplitDir(0), sgpmMode0(0), sgpmMode1(0) {} + SgpmInfo(const int sd, const int sm0, const int sm1) : sgpmSplitDir(sd), sgpmMode0(sm0), sgpmMode1(sm1) {} + SgpmInfo &operator=(const SgpmInfo &other) + { + sgpmSplitDir = other.sgpmSplitDir; + sgpmMode0 = other.sgpmMode0; + sgpmMode1 = other.sgpmMode1; + return *this; + } +}; +#endif // --------------------------------------------------------------------------- // exception class // --------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 86590433b8627bcf5e142ea15dc77d8669764ddf..77695d1ba500ac28a8ff0faf1e16c80d62c1db70 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -301,6 +301,15 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) timdFusionWeight[0] = other.timdFusionWeight[0]; timdFusionWeight[1] = other.timdFusionWeight[1]; #endif +#if JVET_AB0155_SGPM + timdHor = other.timdHor; + timdVer = other.timdVer; + sgpm = other.sgpm; + sgpmIdx = other.sgpmIdx; + sgpmSplitDir = other.sgpmSplitDir; + sgpmMode0 = other.sgpmMode0; + sgpmMode1 = other.sgpmMode1; +#endif #if ENABLE_OBMC obmcFlag = other.obmcFlag; isobmcMC = other.isobmcMC; @@ -400,6 +409,15 @@ void CodingUnit::initData() timdFusionWeight[0] = -1; timdFusionWeight[1] = -1; #endif +#if JVET_AB0155_SGPM + timdHor = -1; + timdVer = -1; + sgpm = false; + sgpmIdx = -1; + sgpmSplitDir = -1; + sgpmMode0 = -1; + sgpmMode1 = -1; +#endif #if ENABLE_OBMC obmcFlag = true; isobmcMC = false; @@ -639,6 +657,10 @@ void PredictionUnit::initData() intraDir[0] = DC_IDX; intraDir[1] = PLANAR_IDX; +#if JVET_AB0155_SGPM + intraDir1[0] = DC_IDX; + intraDir1[1] = PLANAR_IDX; +#endif #if JVET_Z0050_DIMD_CHROMA_FUSION isChromaFusion = false; #endif @@ -769,6 +791,9 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData) for (uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++) { intraDir[i] = predData.intraDir[i]; +#if JVET_AB0155_SGPM + intraDir1[i] = predData.intraDir1[i]; +#endif } #if JVET_Z0050_DIMD_CHROMA_FUSION isChromaFusion = predData.isChromaFusion; @@ -904,6 +929,9 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other ) for( uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ ) { intraDir[ i ] = other.intraDir[ i ]; +#if JVET_AB0155_SGPM + intraDir1[i] = other.intraDir1[i]; +#endif } #if JVET_Z0050_DIMD_CHROMA_FUSION isChromaFusion = other.isChromaFusion; diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index e3ba3f5c7b1765e7dc772728fd4ed9a3fe458f50..270a98d8fc738d396766e903017c5c6d1cf5cba0 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -335,6 +335,15 @@ struct CodingUnit : public UnitArea bool timdIsBlended; int8_t timdFusionWeight[2]; #endif +#if JVET_AB0155_SGPM + int timdHor; + int timdVer; + bool sgpm; + int sgpmIdx; + int sgpmSplitDir; + int sgpmMode0; + int sgpmMode1; +#endif #if ENABLE_OBMC bool obmcFlag; bool isobmcMC; @@ -425,6 +434,9 @@ struct IntraPredictionData uint8_t intraNonMPM[NUM_NON_MPM_MODES]; #endif uint8_t intraDir[MAX_NUM_CHANNEL_TYPE]; +#if JVET_AB0155_SGPM + uint8_t intraDir1[MAX_NUM_CHANNEL_TYPE]; +#endif #if JVET_Z0050_DIMD_CHROMA_FUSION bool isChromaFusion; #endif diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 3028d15f6cb83e4f5f7d6e1fcb2ab4d8bda04ef5..1fb99538d0c148935bfcd67999dbfe1832cb4d9a 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -1517,6 +1517,286 @@ void PU::getGeoIntraMPMs( const PredictionUnit &pu, uint8_t* mpm, uint8_t splitD mpm[numValidMPM] = PLANAR_IDX; } #endif +#if JVET_AB0155_SGPM +void PU::getSgpmIntraMPMs(const PredictionUnit &pu, uint8_t *mpm, uint8_t splitDir, uint8_t shape) +{ + bool includedMode[NUM_INTRA_MODE]; + memset(includedMode, false, sizeof(includedMode)); + + int numValidMPM = 0; + CodingUnit *cu = pu.cu; + bool timdDerived = !(cu->lwidth() * cu->lheight() > 1024); + if (timdDerived) + { + if (includedMode[MAP131TO67(cu->timdHor)] == false && cu->timdHor > DC_IDX) + { + mpm[numValidMPM] = MAP131TO67(cu->timdHor); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + + if (includedMode[MAP131TO67(cu->timdVer)] == false && cu->timdVer > DC_IDX) + { + mpm[numValidMPM] = MAP131TO67(cu->timdVer); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + mpm[numValidMPM] = g_geoAngle2IntraAng[g_GeoParams[splitDir][0]]; + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + +#if ENABLE_DIMD + if (cu->slice->getSPS()->getUseDimd()) + { + if (cu->dimdMode != -1) + { + mpm[numValidMPM] = cu->dimdMode; + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } +#endif + + const CompArea &area = pu.block(COMPONENT_Y); + const Position posA = area.topRight().offset(0, -1); + const Position posAR = area.topRight().offset(1, -1); + const Position posL = area.bottomLeft().offset(-1, 0); + const Position posBL = area.bottomLeft().offset(-1, 1); + const Position posAL = area.topLeft().offset(-1, -1); + + if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puLeft = pu.cs->getPURestricted(posL, pu, CHANNEL_TYPE_LUMA); + if (puLeft && CU::isIntra(*puLeft->cu)) + { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puLeft)) : PU::getIntraDirLuma(*puLeft); +#else + mpm[numValidMPM] = PU::getIntraDirLuma(*puLeft); +#endif + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puAbove = pu.cs->getPURestricted(posA, pu, CHANNEL_TYPE_LUMA); + if (puAbove && CU::isIntra(*puAbove->cu)) + { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = puAbove->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAbove)) : PU::getIntraDirLuma(*puAbove); +#else + mpm[numValidMPM] = PU::getIntraDirLuma(*puAbove); +#endif + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + +#if JVET_W0123_TIMD_FUSION + if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puLeft = pu.cs->getPURestricted(posL, pu, CHANNEL_TYPE_LUMA); + if (puLeft && CU::isInter(*puLeft->cu)) + { + mpm[numValidMPM] = puLeft->getIpmInfo(posL); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puAbove = pu.cs->getPURestricted(posA, pu, CHANNEL_TYPE_LUMA); + if (puAbove && CU::isInter(*puAbove->cu)) + { + mpm[numValidMPM] = puAbove->getIpmInfo(posA); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } +#endif + + if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puBelowLeft = pu.cs->getPURestricted(posBL, pu, CHANNEL_TYPE_LUMA); + if (puBelowLeft && CU::isIntra(*puBelowLeft->cu)) + { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = + puBelowLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puBelowLeft)) : PU::getIntraDirLuma(*puBelowLeft); +#else + mpm[numValidMPM] = PU::getIntraDirLuma(*puBelowLeft); +#endif + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puAboveRight = pu.cs->getPURestricted(posAR, pu, CHANNEL_TYPE_LUMA); + if (puAboveRight && CU::isIntra(*puAboveRight->cu)) + { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = + puAboveRight->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveRight)) : PU::getIntraDirLuma(*puAboveRight); +#else + mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveRight); +#endif + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + { + const PredictionUnit *puAboveLeft = pu.cs->getPURestricted(posAL, pu, CHANNEL_TYPE_LUMA); + if (puAboveLeft && CU::isIntra(*puAboveLeft->cu)) + { +#if JVET_W0123_TIMD_FUSION + mpm[numValidMPM] = + puAboveLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveLeft)) : PU::getIntraDirLuma(*puAboveLeft); +#else + mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveLeft); +#endif + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + +#if JVET_W0123_TIMD_FUSION + if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puBelowLeft = pu.cs->getPURestricted(posBL, pu, CHANNEL_TYPE_LUMA); + if (puBelowLeft && CU::isInter(*puBelowLeft->cu)) + { + mpm[numValidMPM] = puBelowLeft->getIpmInfo(posBL); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL) + { + const PredictionUnit *puAboveRight = pu.cs->getPURestricted(posAR, pu, CHANNEL_TYPE_LUMA); + if (puAboveRight && CU::isInter(*puAboveRight->cu)) + { + mpm[numValidMPM] = puAboveRight->getIpmInfo(posAR); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } + + { + const PredictionUnit *puAboveLeft = pu.cs->getPURestricted(posAL, pu, CHANNEL_TYPE_LUMA); + if (puAboveLeft && CU::isInter(*puAboveLeft->cu)) + { + mpm[numValidMPM] = puAboveLeft->getIpmInfo(posAL); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + } + } +#endif + + mpm[numValidMPM] = (mpm[0] > DIA_IDX) ? (mpm[0] - 32) : (mpm[0] + 32); + if (!includedMode[mpm[numValidMPM]]) + { + includedMode[mpm[numValidMPM++]] = true; + if (numValidMPM == SGPM_NUM_MPM) + { + return; + } + } + mpm[numValidMPM] = PLANAR_IDX; +} +#endif bool PU::isMIP(const PredictionUnit &pu, const ChannelType &chType) { @@ -1681,6 +1961,48 @@ bool PU::hasChromaFusionFlag(const PredictionUnit &pu, int intraMode) } #endif +#if JVET_AB0155_SGPM +bool PU::isSgpm(const PredictionUnit &pu, const ChannelType &chType) +{ + if (chType == CHANNEL_TYPE_LUMA) + { + // Default case if chType is omitted. + return pu.cu->sgpm; + } + else + { + return isDMChromaSgpm(pu) && (pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX); + } +} +bool PU::isDMChromaSgpm(const PredictionUnit &pu) +{ + return false; +} +#endif + +#if JVET_AB0155_SGPM +uint32_t PU::getIntraDirLuma(const PredictionUnit &pu, const int partIdx) +{ +#if JVET_V0130_INTRA_TMP + if (isMIP(pu) || isTmp(pu)) +#else + if (isMIP(pu)) +#endif + { + return PLANAR_IDX; + } + + else + { + if (partIdx) + { + return pu.intraDir1[CHANNEL_TYPE_LUMA]; + } + return pu.intraDir[CHANNEL_TYPE_LUMA]; + } +} +#else + uint32_t PU::getIntraDirLuma( const PredictionUnit &pu ) { #if JVET_V0130_INTRA_TMP @@ -1696,6 +2018,7 @@ uint32_t PU::getIntraDirLuma( const PredictionUnit &pu ) return pu.intraDir[CHANNEL_TYPE_LUMA]; } } +#endif void PU::getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]) { @@ -1732,6 +2055,12 @@ void PU::getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM { return; } +#if JVET_AB0155_SGPM + if (isDMChromaSgpm(pu)) + { + return; + } +#endif const uint32_t lumaMode = getCoLocatedIntraLumaMode(pu); for (int i = 0; i < 4; i++) @@ -1824,8 +2153,32 @@ bool PU::isChromaIntraModeCrossCheckMode( const PredictionUnit &pu ) return !pu.cu->bdpcmModeChroma && pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX; } +#if JVET_AB0155_SGPM +uint32_t PU::getFinalIntraMode(const PredictionUnit &pu, const ChannelType &chType, const int partIdx) +{ + if (partIdx) + { + uint32_t uiIntraMode = pu.intraDir[chType]; + if (isLuma(chType)) + { + uiIntraMode = pu.intraDir1[chType]; + } + + if (uiIntraMode == DM_CHROMA_IDX && !isLuma(chType)) + { + uiIntraMode = getCoLocatedIntraLumaMode(pu, 1); + } + if (pu.chromaFormat == CHROMA_422 && !isLuma(chType) + && uiIntraMode < NUM_LUMA_MODE) // map directional, planar and dc + { + uiIntraMode = g_chroma422IntraAngleMappingTable[uiIntraMode]; + } + return uiIntraMode; + } +#else uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chType ) { +#endif uint32_t uiIntraMode = pu.intraDir[chType]; if( uiIntraMode == DM_CHROMA_IDX && !isLuma( chType ) ) @@ -1860,6 +2213,18 @@ const PredictionUnit &PU::getCoLocatedLumaPU(const PredictionUnit &pu) return lumaPU; } +#if JVET_AB0155_SGPM +uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu, const int partIdx) +{ +#if JVET_W0123_TIMD_FUSION + if (PU::getCoLocatedLumaPU(pu).cu->timd) + { + return MAP131TO67(PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu), partIdx)); + } +#endif + return PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu), partIdx); +} +#else uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu) { #if JVET_W0123_TIMD_FUSION @@ -1870,6 +2235,7 @@ uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu) #endif return PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu)); } +#endif int PU::getWideAngle( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ) { @@ -12518,6 +12884,14 @@ void PU::spanMotionInfo2( PredictionUnit &pu, const MergeCtx &mrgCtx ) void PU::spanIpmInfoIntra( PredictionUnit &pu) { +#if JVET_AB0155_SGPM + if (pu.cu->sgpm) + { + spanIpmInfoSgpm(pu); + return; + } +#endif + int ipm = PU::getIntraDirLuma(pu); if (pu.cu->timd) { @@ -12526,6 +12900,58 @@ void PU::spanIpmInfoIntra( PredictionUnit &pu) IpmBuf ib = pu.getIpmBuf(); ib.fill(ipm); } +#if JVET_AB0155_SGPM +void PU::spanIpmInfoSgpm(PredictionUnit &pu) +{ + int sgpmMode0 = pu.cu->sgpmMode0; + int sgpmMode1 = pu.cu->sgpmMode1; + int splitDir = pu.cu->sgpmSplitDir; + + int16_t angle = g_GeoParams[splitDir][0]; + int tpmMask = 0; + int lookUpY = 0, motionIdx = 0; + bool isFlip = angle >= 13 && angle <= 27; + int distanceIdx = g_GeoParams[splitDir][1]; + int distanceX = angle; + int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES; + int offsetX = (-(int)pu.lwidth()) >> 1; + int offsetY = (-(int)pu.lheight()) >> 1; + + if (distanceIdx > 0) + { + if( angle % 16 == 8 || ( angle % 16 != 0 && pu.lheight() >= pu.lwidth() ) ) + { + offsetY += angle < 16 ? ( ( distanceIdx * pu.lheight() ) >> 3 ) : -( int ) ( ( distanceIdx * pu.lheight() ) >> 3 ); + } + else + { + offsetX += angle < 16 ? ( ( distanceIdx * pu.lwidth() ) >> 3 ) : -( int ) ( ( distanceIdx * pu.lwidth() ) >> 3 ); + } + } + + IpmBuf ib = pu.getIpmBuf(); + uint8_t *ipmSgpm = ib.buf; + + for (int y = 0; y < ib.height; y++) + { + lookUpY = (((4 * y + offsetY) << 1) + 5) * g_Dis[distanceY]; + for (int x = 0; x < ib.width; x++) + { + motionIdx = (((4 * x + offsetX) << 1) + 5) * g_Dis[distanceX] + lookUpY; + tpmMask = motionIdx <= 0 ? (1 - isFlip) : isFlip; + if (tpmMask == 0) + { + *ipmSgpm = sgpmMode0; + } + else + { + *ipmSgpm = sgpmMode1; + } + } + ipmSgpm += ib.stride; + } +} +#endif #if RPR_ENABLE void scalePositionInRef( PredictionUnit& pu, const PPS& pps, RefPicList refList, int refIdx, Position& PosY ) diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index 9227d158ff35fa4cbd208110eb107ab5bfe85bc0..8b9494ac25c15d7ce8f6ff95b658e4f79bc1b63f 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -151,24 +151,43 @@ namespace PU void getGeoIntraMPMs( const PredictionUnit &pu, uint8_t* mpm, uint8_t splitDir, uint8_t shape, bool doInit, bool doInitAL = true, bool doInitA = true, bool doInitL = true); #endif void getGeoIntraMPMs( const PredictionUnit &pu, uint8_t* mpm, uint8_t splitDir, uint8_t shape ); +#endif +#if JVET_AB0155_SGPM + void getSgpmIntraMPMs(const PredictionUnit &pu, uint8_t *mpm, uint8_t splitDir, uint8_t shape); #endif bool isMIP (const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA); #if JVET_V0130_INTRA_TMP bool isTmp(const PredictionUnit& pu, const ChannelType& chType = CHANNEL_TYPE_LUMA); #endif bool isDMChromaMIP (const PredictionUnit &pu); +#if JVET_AB0155_SGPM + bool isSgpm(const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA); + bool isDMChromaSgpm(const PredictionUnit &pu); +#endif +#if JVET_AB0155_SGPM + uint32_t getIntraDirLuma(const PredictionUnit &pu, const int partIdx = 0); +#else uint32_t getIntraDirLuma (const PredictionUnit &pu); +#endif void getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]); const PredictionUnit &getCoLocatedLumaPU(const PredictionUnit &pu); +#if JVET_AB0155_SGPM + uint32_t getFinalIntraMode (const PredictionUnit &pu, const ChannelType &chType, const int partIdx = 0); +#else uint32_t getFinalIntraMode (const PredictionUnit &pu, const ChannelType &chType); +#endif #if JVET_W0119_LFNST_EXTENSION int getLFNSTMatrixDim ( int width, int height ); bool getUseLFNST8 ( int width, int height ); uint8_t getLFNSTIdx ( int intraMode, int mtsMode = 0 ); bool getUseLFNST16 ( int width, int height ); #endif +#if JVET_AB0155_SGPM + uint32_t getCoLocatedIntraLumaMode(const PredictionUnit &pu, const int partIdx = 0); +#else uint32_t getCoLocatedIntraLumaMode (const PredictionUnit &pu); +#endif int getWideAngle ( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID ); #if MULTI_PASS_DMVR || JVET_W0097_GPM_MMVD_TM uint32_t getBDMVRMvdThreshold (const PredictionUnit &pu); @@ -344,6 +363,9 @@ namespace PU void spanIpmInfoIntra ( PredictionUnit &pu ); void spanIpmInfoInter ( PredictionUnit &pu, MotionBuf &mb, IpmBuf &ib ); #endif +#if JVET_AB0155_SGPM + void spanIpmInfoSgpm ( PredictionUnit &pu); +#endif #if !JVET_Z0054_BLK_REF_PIC_REORDER void applyImv ( PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL ); #endif diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h index 94a978ed68d72053d4d8abb661c894c8bb61550d..0149463d0b37780a3a18f8b7d53dba2c6be58b35 100644 --- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h +++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h @@ -3459,6 +3459,717 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel } } +#if JVET_AB0155_SGPM + +template<X86_VEXT vext> +int xSadTM_SSE(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf, + PelBuf &adBuf) +{ + int sad = 0; + int32_t iPredStride = predBuf.stride; + int32_t iRecStride = recBuf.stride; + int32_t iAdStride = adBuf.stride; + + // top template + Pel *piPred = predBuf.buf + templateWidth; + // start point of predBuf is (-templateWidth, -templateHeight) of current block + Pel *piAd = adBuf.buf + templateWidth; + Pel *piRec = recBuf.buf - templateHeight * iRecStride; // start point of recBuf is (0,0) of current block + + if (width == 4) + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + // for luma, to be confirmed + for (int y = 0; y < templateHeight; y++) + { + __m128i vPred = _mm_loadl_epi64((__m128i *) (piPred)); + __m128i vRec = _mm_loadl_epi64((__m128i *) (piRec)); + __m128i vAd = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred)); + _mm_storel_epi64((__m128i *) (piAd), vAd); + __m128i vsumtemp = _mm_unpacklo_epi16(vAd, vzero); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + piPred += iPredStride; + piAd += iAdStride; + piRec += iRecStride; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sad = _mm_cvtsi128_si32(vsum32); + } +#if USE_AVX2 + else if (0 == (width % 16)) + { + __m256i vzero = _mm256_setzero_si256(); + __m256i vsum32 = vzero; + for (int y = 0; y < templateHeight; y++) + { + __m256i vsum16 = vzero; + for (int x = 0; x < width; x += 16) + { + __m256i vPred = _mm256_lddqu_si256((__m256i *) (piPred + x)); // why not aligned with 128/256 bit boundaries + __m256i vRec = _mm256_lddqu_si256((__m256i *) (piRec + x)); + __m256i vAd = _mm256_abs_epi16(_mm256_sub_epi16(vRec, vPred)); + _mm256_storeu_si256((__m256i *) (piAd + x), vAd); + + vsum16 = _mm256_add_epi16(vsum16, vAd); + } + __m256i vsumtemp = _mm256_add_epi32(_mm256_unpacklo_epi16(vsum16, vzero), _mm256_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm256_add_epi32(vsum32, vsumtemp); + piPred += iPredStride; + piAd += iAdStride; + piRec += iRecStride; + } + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + sad = _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))); + } +#endif + else + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + for (int y = 0; y < templateHeight; y++) + { + __m128i vsum16 = vzero; + for (int x = 0; x < width; x += 8) + { + __m128i vPred = _mm_lddqu_si128((__m128i *) (piPred + x)); + __m128i vRec = _mm_lddqu_si128((__m128i *) (piRec + x)); + __m128i vAd = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred)); + _mm_storeu_si128((__m128i *) (piAd + x), vAd); + vsum16 = _mm_add_epi16(vsum16, vAd); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + piPred += iPredStride; + piAd += iAdStride; + piRec += iRecStride; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sad = _mm_cvtsi128_si32(vsum32); + } + + // left template + piPred = predBuf.buf + templateHeight * iPredStride; + // start point of predBuf is (-templateWidth, -templateHeight) of current block + piAd = adBuf.buf + templateHeight * iAdStride; + piRec = recBuf.buf - templateWidth; // start point of recBuf is (0,0) of current block + + if (templateWidth == 4) + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + // for luma, to be confirmed + for (int y = 0; y < height; y++) + { + __m128i vPred = _mm_loadl_epi64((__m128i *) (piPred)); + __m128i vRec = _mm_loadl_epi64((__m128i *) (piRec)); + __m128i vAd = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred)); + _mm_storel_epi64((__m128i *) (piAd), vAd); + __m128i vsumtemp = _mm_unpacklo_epi16(vAd, vzero); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + piPred += iPredStride; + piAd += iAdStride; + piRec += iRecStride; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sad += _mm_cvtsi128_si32(vsum32); + } +#if USE_AVX2 + else if (0 == (templateWidth % 16)) + { + __m256i vzero = _mm256_setzero_si256(); + __m256i vsum32 = vzero; + for (int y = 0; y < height; y++) + { + __m256i vsum16 = vzero; + for (int x = 0; x < templateWidth; x += 16) + { + __m256i vPred = _mm256_lddqu_si256((__m256i *) (piPred + x)); // why not aligned with 128/256 bit boundaries + __m256i vRec = _mm256_lddqu_si256((__m256i *) (piRec + x)); + __m256i vAd = _mm256_abs_epi16(_mm256_sub_epi16(vRec, vPred)); + _mm256_storeu_si256((__m256i *) (piAd + x), vAd); + + vsum16 = _mm256_add_epi16(vsum16, vAd); + } + __m256i vsumtemp = _mm256_add_epi32(_mm256_unpacklo_epi16(vsum16, vzero), _mm256_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm256_add_epi32(vsum32, vsumtemp); + piPred += iPredStride; + piAd += iAdStride; + piRec += iRecStride; + } + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + sad += _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))); + } +#endif + else if (0 == (templateWidth % 8)) + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + for (int y = 0; y < height; y++) + { + __m128i vsum16 = vzero; + for (int x = 0; x < templateWidth; x += 8) + { + __m128i vPred = _mm_lddqu_si128((__m128i *) (piPred + x)); + __m128i vRec = _mm_lddqu_si128((__m128i *) (piRec + x)); + __m128i vAd = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred)); + _mm_storeu_si128((__m128i *) (piAd + x), vAd); + vsum16 = _mm_add_epi16(vsum16, vAd); + } + __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero)); + vsum32 = _mm_add_epi32(vsum32, vsumtemp); + piPred += iPredStride; + piAd += iAdStride; + piRec += iRecStride; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sad += _mm_cvtsi128_si32(vsum32); + } + else + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < templateWidth; x++) + { + *piAd = abs(*piRec - *piPred); + sad += *piAd; + piRec++; + piPred++; + piAd++; + } + + piPred += (iPredStride - templateWidth); + piAd += (iAdStride - templateWidth); + piRec += (iRecStride - templateWidth); + } + } + + return sad; +} + +template<X86_VEXT vext> +int xSgpmSadTM_SSE(const PredictionUnit &pu, const int width, const int height, const int templateWidth, + const int templateHeight, const ComponentID compIdx, const uint8_t splitDir, PelBuf &adBuf) +{ + int sum = 0; + int16_t wIdx = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX; + int16_t hIdx = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX; + int16_t angle = g_GeoParams[splitDir][0]; + int16_t stepY = 0; + int16_t stepX = 1; + int16_t *weightMask = nullptr; + + if (g_angle2mirror[angle] == 2) + { + stepY = -GEO_WEIGHT_MASK_SIZE_EXT; + weightMask = &g_globalGeoWeightsTpl[g_angle2mask[angle]] + [(GEO_WEIGHT_MASK_SIZE_EXT - 1 + - g_weightOffsetEx[splitDir][hIdx][wIdx][1] - GEO_TM_ADDED_WEIGHT_MASK_SIZE) + * GEO_WEIGHT_MASK_SIZE_EXT + + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE]; + } + else if (g_angle2mirror[angle] == 1) + { + stepX = -1; + stepY = GEO_WEIGHT_MASK_SIZE_EXT; + weightMask = &g_globalGeoWeightsTpl[g_angle2mask[angle]] + [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE) + * GEO_WEIGHT_MASK_SIZE_EXT + + (GEO_WEIGHT_MASK_SIZE_EXT - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0] + - GEO_TM_ADDED_WEIGHT_MASK_SIZE)]; + } + else + { + stepY = GEO_WEIGHT_MASK_SIZE_EXT; + weightMask = &g_globalGeoWeightsTpl[g_angle2mask[angle]] + [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE) + * GEO_WEIGHT_MASK_SIZE_EXT + + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE]; + } + + int32_t iAdStride = adBuf.stride; + + if (compIdx != COMPONENT_Y && pu.chromaFormat == CHROMA_420) + { + stepY <<= 1; + } + + // top template + Pel *piAd = adBuf.buf + templateWidth; // start point of adBuf is (-templateWidth, -templateHeight) of current block + int16_t *weightBackup = weightMask; + weightMask = weightMask - templateHeight * stepY; + if (width == 4) + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + for (int y = 0; y < templateHeight; y++) + { + __m128i vAd = _mm_loadl_epi64((__m128i *) (piAd)); + __m128i vMask; + + if (g_angle2mirror[angle] == 1) + { + vMask = _mm_loadl_epi64((__m128i *) (weightMask - (4 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 1, 0, 3, 2, 5, 4, 7, 6); + vMask = _mm_shuffle_epi8(vMask, shuffle_mask); + } + else + { + vMask = _mm_loadl_epi64((__m128i *) weightMask); + } + vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd)); + + piAd += iAdStride; + weightMask += stepY; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sum = _mm_cvtsi128_si32(vsum32); + } +#if USE_AVX2 + else if (0 == (width % 16)) + { + __m256i vzero = _mm256_setzero_si256(); + __m256i vsum32 = vzero; + for (int y = 0; y < templateHeight; y++) + { + for (int x = 0; x < width; x += 16) + { + __m256i vAd = _mm256_lddqu_si256((__m256i *) (piAd + x)); + + __m256i vMask; + + if (g_angle2mirror[angle] == 1) + { + vMask = _mm256_lddqu_si256((__m256i *) (weightMask - x - (16 - 1))); + const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, + 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + vMask = _mm256_shuffle_epi8(vMask, shuffle_mask); + vMask = _mm256_permute4x64_epi64(vMask, _MM_SHUFFLE(1, 0, 3, 2)); + } + else + { + vMask = _mm256_lddqu_si256((__m256i *) (weightMask + x)); + } + vsum32 = _mm256_add_epi32(vsum32, _mm256_madd_epi16(vMask, vAd)); + } + piAd += iAdStride; + weightMask += stepY; + } + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))); + } +#endif + else + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + for (int y = 0; y < templateHeight; y++) + { + for (int x = 0; x < width; x += 8) + { + __m128i vAd = _mm_lddqu_si128((__m128i *) (piAd + x)); + __m128i vMask; + + if (g_angle2mirror[angle] == 1) + { + vMask = _mm_lddqu_si128((__m128i *) (weightMask - x - (8 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + vMask = _mm_shuffle_epi8(vMask, shuffle_mask); + } + else + { + vMask = _mm_lddqu_si128((__m128i *) (weightMask + x)); + } + vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd)); + } + piAd += iAdStride; + weightMask += stepY; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sum = _mm_cvtsi128_si32(vsum32); + } + + // left template + piAd = adBuf.buf + templateHeight * iAdStride; + weightMask = weightBackup - templateWidth * stepX; + if (templateWidth == 4) + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + for (int y = 0; y < height; y++) + { + __m128i vAd = _mm_loadl_epi64((__m128i *) (piAd)); + __m128i vMask; + + if (g_angle2mirror[angle] == 1) + { + vMask = _mm_loadl_epi64((__m128i *) (weightMask - (4 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 1, 0, 3, 2, 5, 4, 7, 6); + vMask = _mm_shuffle_epi8(vMask, shuffle_mask); + } + else + { + vMask = _mm_loadl_epi64((__m128i *) weightMask); + } + vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd)); + + piAd += iAdStride; + weightMask += stepY; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sum += _mm_cvtsi128_si32(vsum32); + } +#if USE_AVX2 + else if (0 == (templateWidth % 16)) + { + __m256i vzero = _mm256_setzero_si256(); + __m256i vsum32 = vzero; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < templateWidth; x += 16) + { + __m256i vAd = _mm256_lddqu_si256((__m256i *) (piAd + x)); + + __m256i vMask; + + if (g_angle2mirror[angle] == 1) + { + vMask = _mm256_lddqu_si256((__m256i *) (weightMask - x - (16 - 1))); + const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, + 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + vMask = _mm256_shuffle_epi8(vMask, shuffle_mask); + vMask = _mm256_permute4x64_epi64(vMask, _MM_SHUFFLE(1, 0, 3, 2)); + } + else + { + vMask = _mm256_lddqu_si256((__m256i *) (weightMask + x)); + } + vsum32 = _mm256_add_epi32(vsum32, _mm256_madd_epi16(vMask, vAd)); + } + piAd += iAdStride; + weightMask += stepY; + } + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + vsum32 = _mm256_hadd_epi32(vsum32, vzero); + sum += _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))); + } +#endif + else if (0 == (templateWidth % 8)) + { + __m128i vzero = _mm_setzero_si128(); + __m128i vsum32 = vzero; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < templateWidth; x += 8) + { + __m128i vAd = _mm_lddqu_si128((__m128i *) (piAd + x)); + __m128i vMask; + + if (g_angle2mirror[angle] == 1) + { + vMask = _mm_lddqu_si128((__m128i *) (weightMask - x - (8 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + vMask = _mm_shuffle_epi8(vMask, shuffle_mask); + } + else + { + vMask = _mm_lddqu_si128((__m128i *) (weightMask + x)); + } + vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd)); + } + piAd += iAdStride; + weightMask += stepY; + } + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e)); // 01001110 + vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1)); // 10110001 + sum += _mm_cvtsi128_si32(vsum32); + } + else + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < templateWidth; x++) + { + sum += *piAd * (*weightMask); + piAd++; + weightMask += stepX; + } + + piAd += (iAdStride - templateWidth); + weightMask += (stepY - templateWidth * stepX); + } + } + + return sum; +} + +template<X86_VEXT vext> +void xWeightedSgpm_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, + const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, PelBuf &predSrc0, + PelBuf &predSrc1) +{ + Pel * dst = predDst.buf; + Pel * src0 = predSrc0.buf; + Pel * src1 = predSrc1.buf; + int32_t strideDst = predDst.stride; + int32_t strideSrc0 = predSrc0.stride; + int32_t strideSrc1 = predSrc1.stride; + + //const char log2WeightBase = 3; + const ClpRng clpRng = pu.cu->slice->clpRngs().comp[compIdx]; + + const int32_t shiftWeighted = 5; + const int32_t offsetWeighted = 16; + int16_t wIdx = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX; + int16_t hIdx = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX; + int16_t angle = g_GeoParams[splitDir][0]; + int16_t stepY = 0; + int16_t *weight = nullptr; + + if (g_angle2mirror[angle] == 2) + { + stepY = -GEO_WEIGHT_MASK_SIZE; + weight = + &g_globalGeoWeights[GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())][g_angle2mask[angle]] + [(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][1]) + * GEO_WEIGHT_MASK_SIZE + + g_weightOffsetEx[splitDir][hIdx][wIdx][0]]; + } + else if (g_angle2mirror[angle] == 1) + { + stepY = GEO_WEIGHT_MASK_SIZE; + weight = + &g_globalGeoWeights[GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())][g_angle2mask[angle]] + [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0])]; + } + else + { + stepY = GEO_WEIGHT_MASK_SIZE; + weight = + &g_globalGeoWeights[GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())][g_angle2mask[angle]] + [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + + g_weightOffsetEx[splitDir][hIdx][wIdx][0]]; + } + const __m128i mmEight = _mm_set1_epi16(32); + const __m128i mmOffset = _mm_set1_epi32(offsetWeighted); + const __m128i mmShift = _mm_cvtsi32_si128(shiftWeighted); + const __m128i mmMin = _mm_set1_epi16(clpRng.min); + const __m128i mmMax = _mm_set1_epi16(clpRng.max); + + if (compIdx != COMPONENT_Y && pu.chromaFormat == CHROMA_420) + stepY <<= 1; + if (width == 4) + { + // for luma, to be confirmed + for (int y = 0; y < height; y++) + { + __m128i s0 = _mm_loadl_epi64((__m128i *) (src0)); + __m128i s1 = _mm_loadl_epi64((__m128i *) (src1)); + __m128i w0; + if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444) + { + if (g_angle2mirror[angle] == 1) + { + w0 = _mm_loadu_si128((__m128i *) (weight - (8 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + w0 = _mm_shuffle_epi8(w0, shuffle_mask); + } + else + { + w0 = _mm_loadu_si128((__m128i *) (weight)); + } + w0 = _mm_shuffle_epi8(w0, _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0)); + } + else + { + if (g_angle2mirror[angle] == 1) + { + w0 = _mm_loadl_epi64((__m128i *) (weight - (4 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(15,14,13,12,11,10,9,8,1,0,3,2,5,4,7,6); + w0 = _mm_shuffle_epi8(w0, shuffle_mask); + } + else + { + w0 = _mm_loadl_epi64((__m128i *) weight); + } + } + + __m128i w1 = _mm_sub_epi16(mmEight, w0); + s0 = _mm_unpacklo_epi16(s0, s1); + w0 = _mm_unpacklo_epi16(w0, w1); + s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset); + s0 = _mm_sra_epi32(s0, mmShift); + s0 = _mm_packs_epi32(s0, s0); + s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin)); + _mm_storel_epi64((__m128i *) (dst), s0); + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } + } +#if USE_AVX2 + else if (0 == (width % 16)) + { + const __m256i mmEightAVX2 = _mm256_set1_epi16(32); + const __m256i mmOffsetAVX2 = _mm256_set1_epi32(offsetWeighted); + const __m256i mmMinAVX2 = _mm256_set1_epi16(clpRng.min); + const __m256i mmMaxAVX2 = _mm256_set1_epi16(clpRng.max); + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x += 16) + { + __m256i s0 = _mm256_lddqu_si256((__m256i *) (src0 + x)); // why not aligned with 128/256 bit boundaries + __m256i s1 = _mm256_lddqu_si256((__m256i *) (src1 + x)); + + __m256i w0 = _mm256_lddqu_si256((__m256i *) (weight + x)); + if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444) + { + const __m256i mask = _mm256_set_epi16(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + __m256i w0p0, w0p1; + if (g_angle2mirror[angle] == 1) + { + w0p0 = _mm256_lddqu_si256( + (__m256i *) (weight - (x << 1) - (16 - 1))); // first sub-sample the required weights. + w0p1 = _mm256_lddqu_si256((__m256i *) (weight - (x << 1) - 16 - (16 - 1))); + const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, + 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + w0p0 = _mm256_shuffle_epi8(w0p0, shuffle_mask); + w0p0 = _mm256_permute4x64_epi64(w0p0, _MM_SHUFFLE(1, 0, 3, 2)); + w0p1 = _mm256_shuffle_epi8(w0p1, shuffle_mask); + w0p1 = _mm256_permute4x64_epi64(w0p1, _MM_SHUFFLE(1, 0, 3, 2)); + } + else + { + w0p0 = _mm256_lddqu_si256((__m256i *) (weight + (x << 1))); // first sub-sample the required weights. + w0p1 = _mm256_lddqu_si256((__m256i *) (weight + (x << 1) + 16)); + } + w0p0 = _mm256_mullo_epi16(w0p0, mask); + w0p1 = _mm256_mullo_epi16(w0p1, mask); + w0 = _mm256_packs_epi16(w0p0, w0p1); + w0 = _mm256_permute4x64_epi64(w0, _MM_SHUFFLE(3, 1, 2, 0)); + } + else + { + if (g_angle2mirror[angle] == 1) + { + w0 = _mm256_lddqu_si256((__m256i *) (weight - x - (16 - 1))); + const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, + 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + w0 = _mm256_shuffle_epi8(w0, shuffle_mask); + w0 = _mm256_permute4x64_epi64(w0, _MM_SHUFFLE(1, 0, 3, 2)); + } + else + { + w0 = _mm256_lddqu_si256((__m256i *) (weight + x)); + } + } + __m256i w1 = _mm256_sub_epi16(mmEightAVX2, w0); + + __m256i s0tmp = _mm256_unpacklo_epi16(s0, s1); + __m256i w0tmp = _mm256_unpacklo_epi16(w0, w1); + s0tmp = _mm256_add_epi32(_mm256_madd_epi16(s0tmp, w0tmp), mmOffsetAVX2); + s0tmp = _mm256_sra_epi32(s0tmp, mmShift); + + s0 = _mm256_unpackhi_epi16(s0, s1); + w0 = _mm256_unpackhi_epi16(w0, w1); + s0 = _mm256_add_epi32(_mm256_madd_epi16(s0, w0), mmOffsetAVX2); + s0 = _mm256_sra_epi32(s0, mmShift); + + s0 = _mm256_packs_epi32(s0tmp, s0); + s0 = _mm256_min_epi16(mmMaxAVX2, _mm256_max_epi16(s0, mmMinAVX2)); + _mm256_storeu_si256((__m256i *) (dst + x), s0); + } + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } + } +#endif + else + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x += 8) + { + __m128i s0 = _mm_lddqu_si128((__m128i *) (src0 + x)); + __m128i s1 = _mm_lddqu_si128((__m128i *) (src1 + x)); + __m128i w0; + if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444) + { + const __m128i mask = _mm_set_epi16(0, 1, 0, 1, 0, 1, 0, 1); + __m128i w0p0, w0p1; + if (g_angle2mirror[angle] == 1) + { + w0p0 = + _mm_lddqu_si128((__m128i *) (weight - (x << 1) - (8 - 1))); // first sub-sample the required weights. + w0p1 = _mm_lddqu_si128((__m128i *) (weight - (x << 1) - 8 - (8 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + w0p0 = _mm_shuffle_epi8(w0p0, shuffle_mask); + w0p1 = _mm_shuffle_epi8(w0p1, shuffle_mask); + } + else + { + w0p0 = _mm_lddqu_si128((__m128i *) (weight + (x << 1))); // first sub-sample the required weights. + w0p1 = _mm_lddqu_si128((__m128i *) (weight + (x << 1) + 8)); + } + w0p0 = _mm_mullo_epi16(w0p0, mask); + w0p1 = _mm_mullo_epi16(w0p1, mask); + w0 = _mm_packs_epi32(w0p0, w0p1); + } + else + { + if (g_angle2mirror[angle] == 1) + { + w0 = _mm_lddqu_si128((__m128i *) (weight - x - (8 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + w0 = _mm_shuffle_epi8(w0, shuffle_mask); + } + else + { + w0 = _mm_lddqu_si128((__m128i *) (weight + x)); + } + } + __m128i w1 = _mm_sub_epi16(mmEight, w0); + + __m128i s0tmp = _mm_unpacklo_epi16(s0, s1); + __m128i w0tmp = _mm_unpacklo_epi16(w0, w1); + s0tmp = _mm_add_epi32(_mm_madd_epi16(s0tmp, w0tmp), mmOffset); + s0tmp = _mm_sra_epi32(s0tmp, mmShift); + + s0 = _mm_unpackhi_epi16(s0, s1); + w0 = _mm_unpackhi_epi16(w0, w1); + s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset); + s0 = _mm_sra_epi32(s0, mmShift); + + s0 = _mm_packs_epi32(s0tmp, s0); + s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin)); + _mm_storeu_si128((__m128i *) (dst + x), s0); + } + dst += strideDst; + src0 += strideSrc0; + src1 += strideSrc1; + weight += stepY; + } + } +} +#endif + template< X86_VEXT vext > #if JVET_AA0058_GPM_ADP_BLD void xWeightedGeoBlk_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, const uint8_t bldIdx, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1) @@ -4243,6 +4954,11 @@ void InterpolationFilter::_initInterpolationFilterX86() #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING m_weightedGeoTplA = xWeightedGeoTpl_SSE<vext, true>; #endif +#if JVET_AB0155_SGPM + m_weightedSgpm = xWeightedSgpm_SSE<vext>; + m_sadTM = xSadTM_SSE<vext>; + m_sgpmSadTM = xSgpmSadTM_SSE<vext>; +#endif } template void InterpolationFilter::_initInterpolationFilterX86<SIMDX86>(); diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 7803dd7a0d4ce7f2b64f7464834f307894119bfe..ece1232db8bb2d65def6b358397ec7ae3b5b7d6d 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -1812,6 +1812,9 @@ void CABACReader::extend_ref_line(CodingUnit& cu) if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode #if ENABLE_DIMD || cu.dimd +#endif +#if JVET_AB0155_SGPM + || cu.sgpm #endif ) { @@ -1933,6 +1936,14 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu ) #if JVET_W0123_TIMD_FUSION cu_timd_flag(cu); #endif +#if JVET_AB0155_SGPM + sgpm_flag(cu); + if (cu.sgpm) + { + return; + } +#endif + extend_ref_line( cu ); isp_mode( cu ); #if ENABLE_DIMD @@ -2132,6 +2143,50 @@ void CABACReader::cu_timd_flag( CodingUnit& cu ) } #endif +#if JVET_AB0155_SGPM +void CABACReader::sgpm_flag(CodingUnit &cu) +{ + if (!cu.cs->sps->getUseSgpm()) + { + cu.sgpm = false; + return; + } + if (!(cu.lwidth() >= GEO_MIN_CU_SIZE_EX && cu.lheight() >= GEO_MIN_CU_SIZE_EX && cu.lwidth() <= GEO_MAX_CU_SIZE_EX + && cu.lheight() <= GEO_MAX_CU_SIZE_EX && cu.lwidth() < 8 * cu.lheight() && cu.lheight() < 8 * cu.lwidth() + && cu.lwidth() * cu.lheight() >= SGPM_MIN_PIX)) + { + cu.sgpm = false; + return; + } + + if (cu.dimd || cu.timd || cu.mipFlag || cu.tmpFlag) + { + cu.sgpm = false; + return; + } + if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType)) + { + cu.sgpm = false; + return; + } + if (!(cu.lx() && cu.ly())) + { + cu.sgpm = false; + return; + } + + unsigned ctxId = DeriveCtx::CtxSgpmFlag(cu); + cu.sgpm = m_BinDecoder.decodeBin(Ctx::SgpmFlag(ctxId)); + + if (cu.sgpm) + { + uint32_t sgpmIdx = 0; + xReadTruncBinCode(sgpmIdx, SGPM_NUM); + cu.sgpmIdx = sgpmIdx; + } +} +#endif + void CABACReader::intra_chroma_pred_modes( CodingUnit& cu ) { #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS @@ -5946,6 +6001,9 @@ void CABACReader::isp_mode( CodingUnit& cu ) if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform #if ENABLE_DIMD && JVET_V0087_DIMD_NO_ISP || cu.dimd +#endif +#if JVET_AB0155_SGPM + || cu.sgpm #endif ) { diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h index 1f5b452f32fac785920aa8f91ce02836a6c9ab3b..581e8ee449aee73f418566d04a7bd44bf8bf22ff 100644 --- a/source/Lib/DecoderLib/CABACReader.h +++ b/source/Lib/DecoderLib/CABACReader.h @@ -115,6 +115,9 @@ public: void intra_luma_pred_modes ( CodingUnit& cu ); #if JVET_W0123_TIMD_FUSION void cu_timd_flag ( CodingUnit& cu ); +#endif +#if JVET_AB0155_SGPM + void sgpm_flag ( CodingUnit& cu ); #endif void intra_chroma_pred_modes ( CodingUnit& cu ); bool intra_chroma_lmc_mode ( PredictionUnit& pu ); diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 8e18da77bd600f5adaa53060ff46ebe81b378e8d..ab4a4e139ce3c72af3b084462760df496acd9b93 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -322,6 +322,35 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) pu->intraDir[0] = currCU.timdMode; } #endif + +#if JVET_AB0155_SGPM + else if (currCU.sgpm) + { + PredictionUnit *pu = currCU.firstPU; + const CompArea &area = currCU.Y(); +#if SECONDARY_MPM + IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU); +#endif + static_vector<SgpmInfo, SGPM_NUM> sgpmInfoList; + static_vector<double, SGPM_NUM> sgpmCostList; + int sgpmIdx = currCU.sgpmIdx; + + if (currCU.lwidth() * currCU.lheight() <= 1024) + { + m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU, false, true); + } + + m_pcIntraPred->deriveSgpmModeOrdered(currCU.cs->picture->getRecoBuf(area), area, currCU, sgpmInfoList, sgpmCostList); + + currCU.sgpmSplitDir = sgpmInfoList[sgpmIdx].sgpmSplitDir; + currCU.sgpmMode0 = sgpmInfoList[sgpmIdx].sgpmMode0; + currCU.sgpmMode1 = sgpmInfoList[sgpmIdx].sgpmMode1; + + pu->intraDir[0] = currCU.sgpmMode0; + pu->intraDir1[0] = currCU.sgpmMode1; + } +#endif + else if (currCU.firstPU->parseLumaMode) { const CompArea &area = currCU.Y(); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index ec6ed9279c29bd33902e20e10752590062cb4c95..845353c65786d5aec2bd34fe0706af49964546ce 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -2494,6 +2494,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS) #endif #if JVET_W0123_TIMD_FUSION READ_FLAG(uiCode, "sps_timd_enabled_flag"); pcSPS->setUseTimd( uiCode != 0 ); +#endif +#if JVET_AB0155_SGPM + READ_FLAG(uiCode, "sps_sgpm_enabled_flag"); pcSPS->setUseSgpm(uiCode != 0); #endif if( pcSPS->getChromaFormatIdc() != CHROMA_400) { @@ -5352,6 +5355,9 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo) #endif #if JVET_W0123_TIMD_FUSION READ_FLAG(symbol, "gci_no_timd_constraint_flag"); cinfo->setNoTimdConstraintFlag(symbol > 0 ? true : false); +#endif +#if JVET_AB0155_SGPM + READ_FLAG(symbol, "gci_no_sgpm_constraint_flag"); cinfo->setNoSgpmConstraintFlag(symbol > 0 ? true : false); #endif /* inter */ READ_FLAG(symbol, "gci_no_ref_pic_resampling_constraint_flag"); cinfo->setNoRprConstraintFlag(symbol > 0 ? true : false); diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index c1f94280388c47a7d5e81a48859124e9043f1328..8ffe3b5959b617875f661647de15d4f321b79b2f 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -1278,6 +1278,9 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu) if( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma( cu.chType ) || cu.bdpcmMode #if ENABLE_DIMD || cu.dimd +#endif +#if JVET_AB0155_SGPM + || cu.sgpm #endif ) { @@ -1352,6 +1355,9 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu) if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode #if ENABLE_DIMD || cu.dimd +#endif +#if JVET_AB0155_SGPM + || cu.sgpm #endif ) { @@ -1461,6 +1467,13 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu ) } #if JVET_W0123_TIMD_FUSION cu_timd_flag(cu); +#endif +#if JVET_AB0155_SGPM + sgpm_flag(cu); + if (cu.sgpm) + { + return; + } #endif extend_ref_line( cu ); @@ -1681,6 +1694,13 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu ) } #if JVET_W0123_TIMD_FUSION cu_timd_flag(*pu.cu); +#endif +#if JVET_AB0155_SGPM + sgpm_flag(*pu.cu); + if (pu.cu->sgpm) + { + return; + } #endif extend_ref_line( pu ); isp_mode( *pu.cu ); @@ -1846,6 +1866,43 @@ void CABACWriter::cu_timd_flag( const CodingUnit& cu ) } #endif +#if JVET_AB0155_SGPM +void CABACWriter::sgpm_flag(const CodingUnit &cu) +{ + if (!cu.cs->sps->getUseSgpm()) + { + return; + } + if (!(cu.lwidth() >= GEO_MIN_CU_SIZE_EX && cu.lheight() >= GEO_MIN_CU_SIZE_EX && cu.lwidth() <= GEO_MAX_CU_SIZE_EX + && cu.lheight() <= GEO_MAX_CU_SIZE_EX && cu.lwidth() < 8 * cu.lheight() && cu.lheight() < 8 * cu.lwidth() + && cu.lwidth() * cu.lheight() >= SGPM_MIN_PIX)) + { + return; + } + + if (cu.dimd || cu.timd || cu.mipFlag || cu.tmpFlag) + { + return; + } + if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType)) + { + return; + } + if (!(cu.lx() && cu.ly())) + { + return; + } + + unsigned ctxId = DeriveCtx::CtxSgpmFlag(cu); + m_BinEncoder.encodeBin(cu.sgpm, Ctx::SgpmFlag(ctxId)); + + if (cu.sgpm) + { + xWriteTruncBinCode(cu.sgpmIdx, SGPM_NUM); + } +} +#endif + #if ENABLE_DIMD void CABACWriter::cu_dimd_flag(const CodingUnit& cu) { @@ -5754,6 +5811,9 @@ void CABACWriter::isp_mode( const CodingUnit& cu ) if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform #if ENABLE_DIMD && JVET_V0087_DIMD_NO_ISP || cu.dimd +#endif +#if JVET_AB0155_SGPM + || cu.sgpm #endif ) { diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h index 09e5b24573169b537d54923b396f3bcc9940ac69..049f84d3cc236d46c8bba45ab69c389dcb107d00 100644 --- a/source/Lib/EncoderLib/CABACWriter.h +++ b/source/Lib/EncoderLib/CABACWriter.h @@ -128,6 +128,9 @@ public: #endif #if JVET_W0123_TIMD_FUSION void cu_timd_flag ( const CodingUnit& cu ); +#endif +#if JVET_AB0155_SGPM + void sgpm_flag (const CodingUnit& cu ); #endif void intra_chroma_pred_modes ( const CodingUnit& cu ); void intra_chroma_lmc_mode ( const PredictionUnit& pu ); diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index c5503db4efa1f016fa33e3711425e2d83c480d52..890e29d692df671fece66ed1d0819478f45edd52 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -252,6 +252,9 @@ protected: #if JVET_W0123_TIMD_FUSION bool m_noTimdConstraintFlag; #endif +#if JVET_AB0155_SGPM + bool m_noSgpmConstraintFlag; +#endif #if ENABLE_OBMC bool m_noObmcConstraintFlag; #endif @@ -464,6 +467,9 @@ protected: #if JVET_W0123_TIMD_FUSION bool m_timd; #endif +#if JVET_AB0155_SGPM + bool m_sgpm; +#endif #if ENABLE_OBMC bool m_OBMC; #endif @@ -1098,6 +1104,10 @@ public: bool getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; } void setNoTimdConstraintFlag(bool val) { m_noTimdConstraintFlag = val; } #endif +#if JVET_AB0155_SGPM + bool getNoSgpmConstraintFlag() const { return m_noSgpmConstraintFlag; } + void setNoSgpmConstraintFlag(bool val) { m_noSgpmConstraintFlag = val; } +#endif #if ENABLE_OBMC bool getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; } void setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; } @@ -1464,6 +1474,10 @@ public: void setUseTimd ( bool b ) { m_timd = b; } bool getUseTimd () const { return m_timd; } #endif +#if JVET_AB0155_SGPM + void setUseSgpm (bool b) { m_sgpm = b; } + bool getUseSgpm () const { return m_sgpm; } +#endif #if ENABLE_OBMC void setUseObmc ( bool b ) { m_OBMC = b; } bool getUseObmc () const { return m_OBMC; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index 38b32dc9ec467abb11263d2d48176e43237c328a..a4f547186d8acbd956c41b27728b2a751f3e6845 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -2238,6 +2238,10 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS bool timdIsBlended = false; int timdFusionWeight[2] = { 0 }; #endif +#if JVET_AB0155_SGPM + int timdHorMode = 0; + int timdVerMode = 0; +#endif double dct2Cost = MAX_DOUBLE; @@ -2439,13 +2443,22 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS if (!timdDerived) { const CompArea &area = cu.Y(); + +#if JVET_AB0155_SGPM + cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu, true, true); +#else cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu); +#endif timdMode = cu.timdMode; timdDerived = true; timdModeSecondary = cu.timdModeSecondary; timdIsBlended = cu.timdIsBlended; timdFusionWeight[0] = cu.timdFusionWeight[0]; timdFusionWeight[1] = cu.timdFusionWeight[1]; +#if JVET_AB0155_SGPM + timdHorMode = cu.timdHor; + timdVerMode = cu.timdVer; +#endif } else { @@ -2454,6 +2467,10 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS cu.timdIsBlended = timdIsBlended; cu.timdFusionWeight[0] = timdFusionWeight[0]; cu.timdFusionWeight[1] = timdFusionWeight[1]; +#if JVET_AB0155_SGPM + cu.timdHor = timdHorMode; + cu.timdVer = timdVerMode; +#endif } } #endif diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 55cfebe1c5d87f6108b61af80eaa5ac25c2b52e4..06e97509250c3c4ea809ce99e78e450c4d3b921f 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -1453,6 +1453,9 @@ void EncLib::xInitSPS( SPS& sps ) #if JVET_W0123_TIMD_FUSION cinfo->setNoTimdConstraintFlag(m_noTimdConstraintFlag); #endif +#if JVET_AB0155_SGPM + cinfo->setNoSgpmConstraintFlag(m_noSgpmConstraintFlag); +#endif #if ENABLE_OBMC cinfo->setNoObmcConstraintFlag(m_noObmcConstraintFlag); #endif @@ -1641,6 +1644,9 @@ void EncLib::xInitSPS( SPS& sps ) #if JVET_W0123_TIMD_FUSION sps.setUseTimd ( m_timd ); #endif +#if JVET_AB0155_SGPM + sps.setUseSgpm ( m_sgpm ); +#endif #if ENABLE_OBMC sps.setUseOBMC ( m_OBMC ); #endif diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index 51d7f4ca7658483bdfd8dd81eebfd68efa02b775..00573527c4e28a88a41cb0af24babb49979c741e 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -74,6 +74,17 @@ IntraSearch::IntraSearch() { m_pSharedPredTransformSkip[ch] = nullptr; } +#if JVET_AB0155_SGPM + for (int i = 0; i < NUM_LUMA_MODE; i++) + { + m_intraPredBuf[i] = nullptr; + } + for (int i = 0; i < SGPM_NUM; i++) + { + m_sgpmPredBuf[i] = nullptr; + } +#endif + m_truncBinBits = nullptr; m_escapeNumBins = nullptr; m_minErrorIndexMap = nullptr; @@ -175,6 +186,21 @@ void IntraSearch::destroy() m_tmpStorageLCU.destroy(); m_colorTransResiBuf.destroy(); + +#if JVET_AB0155_SGPM + for (int i = 0; i < NUM_LUMA_MODE; i++) + { + delete[] m_intraPredBuf[i]; + m_intraPredBuf[i] = nullptr; + } + + for (int i = 0; i < SGPM_NUM; i++) + { + delete[] m_sgpmPredBuf[i]; + m_sgpmPredBuf[i] = nullptr; + } +#endif + m_isInitialized = false; if (m_truncBinBits != nullptr) { @@ -260,6 +286,16 @@ void IntraSearch::init( EncCfg* pcEncCfg, m_cccmStorage[cccmIdx].create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); } #endif +#if JVET_AB0155_SGPM + for (int i = 0; i < NUM_LUMA_MODE; i++) + { + m_intraPredBuf[i] = new Pel[GEO_MAX_CU_SIZE_EX * GEO_MAX_CU_SIZE_EX]; + } + for (int i = 0; i < SGPM_NUM; i++) + { + m_sgpmPredBuf[i] = new Pel[GEO_MAX_CU_SIZE_EX * GEO_MAX_CU_SIZE_EX]; + } +#endif for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ ) { @@ -473,6 +509,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_W0123_TIMD_FUSION const TempCtx ctxStartTimdFlag ( m_CtxCache, SubCtx( Ctx::TimdFlag, m_CABACEstimator->getCtx() ) ); #endif +#if JVET_AB0155_SGPM + const TempCtx ctxStartSgpmFlag(m_CtxCache, SubCtx(Ctx::SgpmFlag, m_CABACEstimator->getCtx())); +#endif + const TempCtx ctxStartIspMode ( m_CtxCache, SubCtx( Ctx::ISPMode, m_CABACEstimator->getCtx() ) ); #if SECONDARY_MPM const TempCtx ctxStartMPMIdxFlag(m_CtxCache, SubCtx(Ctx::IntraLumaMPMIdx, m_CABACEstimator->getCtx())); @@ -490,6 +530,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c bool LFNSTSaveFlag = sps.getUseLFNST() && cu.lfnstIdx == 0; LFNSTSaveFlag &= sps.getUseIntraMTS() ? cu.mtsFlag == 0 : true; +#if JVET_AB0155_SGPM + bool SGPMSaveFlag = LFNSTSaveFlag; +#endif const uint32_t lfnstIdx = cu.lfnstIdx; #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS @@ -538,6 +581,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c bool ispCanBeUsed = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()); bool saveDataForISP = ispCanBeUsed && (!colorTransformIsEnabled || isFirstColorSpace); bool testISP = ispCanBeUsed && (!colorTransformIsEnabled || !cu.colorTransform); + +#if JVET_AB0155_SGPM + const bool sgpmAllowed = sps.getUseSgpm() && isLuma(partitioner.chType); + bool testSgpm = sgpmAllowed && cu.lwidth() >= GEO_MIN_CU_SIZE_EX && cu.lheight() >= GEO_MIN_CU_SIZE_EX + && cu.lwidth() <= GEO_MAX_CU_SIZE_EX && cu.lheight() <= GEO_MAX_CU_SIZE_EX + && cu.lwidth() < 8 * cu.lheight() && cu.lheight() < 8 * cu.lwidth() && cu.lx() && cu.ly() + && cu.lwidth() * cu.lheight() >= SGPM_MIN_PIX; +#endif + #if JVET_W0103_INTRA_MTS if (testISP && m_pcEncCfg->getUseFastISP()) { @@ -616,6 +668,12 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_W0123_TIMD_FUSION bool bestTimdMode = false; #endif +#if JVET_AB0155_SGPM + bool bestSgpmMode = false; + const CompArea &area = pu.Y(); + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); +#endif + if (isSecondColorSpace) { uiRdModeList.clear(); @@ -635,6 +693,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c { if (mtsUsageFlag != 2) { +#if JVET_AB0155_SGPM + if (testSgpm && SGPMSaveFlag) + { + for (int i = 0; i < NUM_LUMA_MODE; i++) + { + m_intraModeReady[i] = 0; + } + } +#endif // this should always be true CHECK(!pu.Y().valid(), "PU is not valid"); bool isFirstLineOfCtu = (((pu.block(COMPONENT_Y).y) & ((pu.cs->sps)->getMaxCUWidth() - 1)) == 0); @@ -678,8 +745,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c if (numModesForFullRD != numModesAvailable) { CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search"); - +#if !JVET_AB0155_SGPM const CompArea &area = pu.Y(); +#endif PelBuf piOrg = cs.getOrgBuf(area); PelBuf piPred = cs.getPredBuf(area); @@ -688,7 +756,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c DistParam distParamHad; if (cu.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()) { +#if !JVET_AB0155_SGPM CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); +#endif PelBuf tmpOrg = m_tmpStorageLCU.getBuf(tmpArea); tmpOrg.rspSignal( piOrg, m_pcReshape->getFwdLUT() ); m_pcRdCost->setDistParam(distParamSad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, @@ -725,6 +795,25 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c const int numHadCand = (testMip ? 2 : 1) * 3; #endif +#if JVET_AB0155_SGPM + static_vector<SgpmInfo, SGPM_NUM> sgpmInfoList; + static_vector<double, SGPM_NUM> sgpmCostList; + int sgpmNeededMode[NUM_LUMA_MODE] = {0}; + + if (testSgpm && SGPMSaveFlag) + { + deriveSgpmModeOrdered(bestCS->picture->getRecoBuf(area), area, cu, sgpmInfoList, sgpmCostList); + for (int sgpmIdx = 0; sgpmIdx < SGPM_NUM; sgpmIdx++) + { + int sgpmMode[2]; + sgpmMode[0] = sgpmInfoList[sgpmIdx].sgpmMode0; + sgpmMode[1] = sgpmInfoList[sgpmIdx].sgpmMode1; + sgpmNeededMode[sgpmMode[0]] = 1; + sgpmNeededMode[sgpmMode[1]] = 1; + } + } +#endif + //*** Derive (regular) candidates using Hadamard cu.mipFlag = false; @@ -752,6 +841,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c initPredIntraParams(pu, pu.Y(), sps); predIntraAng(COMPONENT_Y, piPred, pu); +#if JVET_AB0155_SGPM + if (testSgpm && SGPMSaveFlag && sgpmNeededMode[uiMode]) + { + PelBuf predBuf(m_intraPredBuf[uiMode], tmpArea); + predBuf.copyFrom(piPred); + m_intraModeReady[uiMode] = 1; + } +#endif + // Use the min between SAD and HAD as the cost criterion // SAD is scaled by 2 to align with the scaling of HAD minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); @@ -764,6 +862,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_W0123_TIMD_FUSION m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); #endif +#if JVET_AB0155_SGPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag); +#endif + m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode ); #if SECONDARY_MPM m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag); @@ -829,6 +931,14 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c initPredIntraParams(pu, pu.Y(), sps); predIntraAng(COMPONENT_Y, piPred, pu); +#if JVET_AB0155_SGPM + if (testSgpm && SGPMSaveFlag && sgpmNeededMode[mode]) + { + PelBuf predBuf(m_intraPredBuf[mode], tmpArea); + predBuf.copyFrom(piPred); + m_intraModeReady[mode] = 1; + } +#endif // Use the min between SAD and SATD as the cost criterion // SAD is scaled by 2 to align with the scaling of HAD @@ -843,6 +953,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); #if JVET_W0123_TIMD_FUSION m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif +#if JVET_AB0155_SGPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag); #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM @@ -914,6 +1027,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); #if JVET_W0123_TIMD_FUSION m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif +#if JVET_AB0155_SGPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag); #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM @@ -1097,6 +1213,118 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CandHadList = m_dSavedHadListLFNST; } +#if JVET_AB0155_SGPM + if (testSgpm) + { + if (SGPMSaveFlag) + { + m_uiSavedRdModeListSGPM.clear(); + m_dSavedModeCostSGPM.clear(); + m_uiSavedHadModeListSGPM.clear(); + m_dSavedHadListSGPM.clear(); + + cu.tmpFlag = false; + pu.multiRefIdx = 0; + cu.mipFlag = false; + + initIntraPatternChType(cu, pu.Y(), true); + + // get single mode predictions + for (int sgpmIdx = 0; sgpmIdx < SGPM_NUM; sgpmIdx++) + { + int sgpmMode[2]; + sgpmMode[0] = sgpmInfoList[sgpmIdx].sgpmMode0; + sgpmMode[1] = sgpmInfoList[sgpmIdx].sgpmMode1; + for (int idxIn2 = 0; idxIn2 < 2; idxIn2++) + { + if (!m_intraModeReady[sgpmMode[idxIn2]]) + { + pu.intraDir[0] = sgpmMode[idxIn2]; + + initPredIntraParams(pu, pu.Y(), sps); + predIntraAng(COMPONENT_Y, piPred, pu); + + PelBuf predBuf(m_intraPredBuf[sgpmMode[idxIn2]], tmpArea); + predBuf.copyFrom(piPred); + m_intraModeReady[sgpmMode[idxIn2]] = 1; + } + } + } + + cu.sgpm = true; + // frac bits calculate once because all are the same + cu.sgpmIdx = 0; + cu.sgpmSplitDir = sgpmInfoList[0].sgpmSplitDir; + cu.sgpmMode0 = sgpmInfoList[0].sgpmMode0; + cu.sgpmMode1 = sgpmInfoList[0].sgpmMode1; + pu.intraDir[0] = cu.sgpmMode0; + pu.intraDir1[0] = cu.sgpmMode1; + + // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. +#if JVET_V0130_INTRA_TMP + m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag); +#endif + m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); +#if JVET_W0123_TIMD_FUSION + m_CABACEstimator->getCtx() = SubCtx(Ctx::TimdFlag, ctxStartTimdFlag); +#endif +#if JVET_AB0155_SGPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag); +#endif + + m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); +#if SECONDARY_MPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag); +#endif + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag); + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode); +#if SECONDARY_MPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaSecondMpmFlag, ctxStartIntraMode2); +#endif + m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx); + + uint64_t fracModeBits = xFracModeBitsIntra(pu, 0, CHANNEL_TYPE_LUMA); + + for (int sgpmIdx = 0; sgpmIdx < SGPM_NUM; sgpmIdx++) + { + int sgpmMode0 = sgpmInfoList[sgpmIdx].sgpmMode0; + int sgpmMode1 = sgpmInfoList[sgpmIdx].sgpmMode1; + PelBuf src0(m_intraPredBuf[sgpmMode0], tmpArea); + PelBuf src1(m_intraPredBuf[sgpmMode1], tmpArea); + + m_if.m_weightedSgpm(pu, width, height, COMPONENT_Y, sgpmInfoList[sgpmIdx].sgpmSplitDir, piPred, src0, src1); + + PelBuf predBuf(m_sgpmPredBuf[sgpmIdx], tmpArea); + predBuf.copyFrom(piPred); + + Distortion minSadHad = 0; + minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad)); + double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; + + updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, SGPM_IDX, 0, 1, + sgpmInfoList[sgpmIdx].sgpmSplitDir, sgpmInfoList[sgpmIdx].sgpmMode0, + sgpmInfoList[sgpmIdx].sgpmMode1, sgpmIdx), + cost, m_uiSavedRdModeListSGPM, m_dSavedModeCostSGPM, SGPM_NUM); + updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, SGPM_IDX, 0, 1, + sgpmInfoList[sgpmIdx].sgpmSplitDir, sgpmInfoList[sgpmIdx].sgpmMode0, + sgpmInfoList[sgpmIdx].sgpmMode1, sgpmIdx), + double(minSadHad), m_uiSavedHadModeListSGPM, m_dSavedHadListSGPM, SGPM_NUM); + } + + cu.sgpm = false; + } + + int updateNum = (numModesForFullRD + 1) / 2; + for (auto listIdx = 0; listIdx < updateNum; listIdx++) + { + updateCandList(m_uiSavedRdModeListSGPM[listIdx], m_dSavedModeCostSGPM[listIdx], uiRdModeList, + CandCostList, numModesForFullRD); + updateCandList(m_uiSavedHadModeListSGPM[listIdx], m_dSavedHadListSGPM[listIdx], uiHadModeList, + CandHadList, numHadCand); + } + } +#endif + if (m_pcEncCfg->getFastUDIUseMPMEnabled()) { @@ -1290,6 +1518,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag); #if JVET_W0123_TIMD_FUSION m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag ); +#endif +#if JVET_AB0155_SGPM + m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag); #endif m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode); #if SECONDARY_MPM @@ -1494,6 +1725,18 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c cu.dimd = true; } #endif +#if JVET_AB0155_SGPM + cu.sgpm = uiOrgMode.sgpmFlag; + if (cu.sgpm) + { + uiOrgMode.modeId = uiOrgMode.sgpmMode0; + cu.sgpmSplitDir = uiOrgMode.sgpmSplitDir; + cu.sgpmMode0 = uiOrgMode.sgpmMode0; + cu.sgpmMode1 = uiOrgMode.sgpmMode1; + cu.sgpmIdx = uiOrgMode.sgpmIdx; + pu.intraDir1[CHANNEL_TYPE_LUMA] = uiOrgMode.sgpmMode1; + } +#endif #if JVET_V0130_INTRA_TMP cu.tmpFlag = uiOrgMode.tmpFlag; #if JVET_W0103_INTRA_MTS @@ -1557,6 +1800,16 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c CHECK( cu.tmpFlag && cu.ispMode, "Error: combination of TPM and ISP not supported" ); CHECK( cu.tmpFlag && pu.multiRefIdx, "Error: combination of TPM and MRL not supported" ); #endif +#if JVET_AB0155_SGPM + CHECK(cu.sgpm && cu.tmpFlag, "Error: combination of SGPM and TPM not supported"); + CHECK(cu.sgpm && cu.ispMode, "Error: combination of SGPM and ISP not supported"); + CHECK(cu.sgpm && pu.multiRefIdx, "Error: combination of SGPM and MRL not supported"); + CHECK(cu.sgpm && cu.mipFlag, "Error: combination of SGPM and MIP not supported"); + CHECK(cu.sgpm && cu.timd, "Error: combination of SGPM and TIMD not supported"); + CHECK(cu.sgpm && cu.dimd, "Error: combination of SGPM and DIMD not supported"); + CHECK(cu.sgpm && cu.bdpcmMode, "Error: combination of SGPM and BDPCM not supported"); +#endif + #if ENABLE_DIMD && JVET_V0087_DIMD_NO_ISP CHECK(cu.ispMode && cu.dimd, "Error: combination of ISP and DIMD not supported"); #endif @@ -1645,7 +1898,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #endif #if JVET_V0130_INTRA_TMP #if JVET_W0123_TIMD_FUSION - if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP && !cu.timd) + if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP && !cu.timd +#if JVET_AB0155_SGPM + && !cu.sgpm +#endif + ) #else if( !cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP ) #endif @@ -1741,6 +1998,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c #if JVET_W0123_TIMD_FUSION bestTimdMode = cu.timd; #endif +#if JVET_AB0155_SGPM + bestSgpmMode = cu.sgpm; +#endif + if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode ) { m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost; @@ -1864,6 +2125,20 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c pu.intraDir[ CHANNEL_TYPE_LUMA ] = cu.timdMode; } #endif +#if JVET_AB0155_SGPM + cu.sgpm = uiBestPUMode.sgpmFlag; + if (cu.sgpm) + { + CHECK(!bestSgpmMode, "mode not same"); + pu.intraDir[CHANNEL_TYPE_LUMA] = uiBestPUMode.sgpmMode0; + pu.intraDir1[CHANNEL_TYPE_LUMA] = uiBestPUMode.sgpmMode1; + cu.sgpmSplitDir = uiBestPUMode.sgpmSplitDir; + cu.sgpmMode0 = uiBestPUMode.sgpmMode0; + cu.sgpmMode1 = uiBestPUMode.sgpmMode1; + cu.sgpmIdx = uiBestPUMode.sgpmIdx; + } +#endif + if (cu.colorTransform) { CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform"); @@ -4664,6 +4939,16 @@ void IntraSearch::xSelectAMTForFullRD(TransformUnit &tu) } else { +#if JVET_AB0155_SGPM + if (pu.cu->sgpm) + { + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf predBuf(m_sgpmPredBuf[pu.cu->sgpmIdx], tmpArea); + piPred.copyFrom(predBuf); + } + else +#endif + predIntraAng(COMPONENT_Y, piPred, pu); } @@ -4863,6 +5148,16 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp } else { +#if JVET_AB0155_SGPM + if (pu.cu->sgpm &&compID == COMPONENT_Y) + { + CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size()); + PelBuf predBuf(m_sgpmPredBuf[pu.cu->sgpmIdx], tmpArea); + piPred.copyFrom(predBuf); + } + else +#endif + predIntraAng(compID, piPred, pu); } #if JVET_Z0050_DIMD_CHROMA_FUSION @@ -7904,8 +8199,12 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve if (!alreadyIncluded) { +#if JVET_AB0155_SGPM + updateCandList(mipMode, sortedMipCost[idx], tempRdModeList, tempCandCostList, tempRdModeList.size() + 1); +#else tempRdModeList.push_back(mipMode); tempCandCostList.push_back(0); +#endif if( fastMip ) break; } } diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h index 93795e081e7d8064f26c64b6a0be311cd371ea3f..49faf5dfe7623dd03fcb1c4780e2544ccf688665 100644 --- a/source/Lib/EncoderLib/IntraSearch.h +++ b/source/Lib/EncoderLib/IntraSearch.h @@ -227,7 +227,53 @@ private: uint32_t modeId; // PU::intraDir[CHANNEL_TYPE_LUMA] #if JVET_V0130_INTRA_TMP bool tmpFlag; // CU::tmpFlag - +#endif +#if JVET_AB0155_SGPM + bool sgpmFlag; // CU::sgpmFlag + int sgpmSplitDir; + int sgpmMode0; + int sgpmMode1; + int sgpmIdx; +#endif +#if JVET_AB0155_SGPM && JVET_V0130_INTRA_TMP + ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0), tmpFlag(0), sgpmFlag(0), sgpmSplitDir(0), sgpmMode0(0), sgpmMode1(0), sgpmIdx(0){} + ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, + const bool tpmf = 0, const bool sf = 0, const int sd = 0, const int sm0 = 0, const int sm1 = 0, const int si = 0) + : mipFlg(mipf) + , mipTrFlg(miptf) + , mRefId(mrid) + , ispMod(ispm) + , modeId(mode) + , tmpFlag(tpmf) + , sgpmFlag(sf) + , sgpmSplitDir(sd) + , sgpmMode0(sm0) + , sgpmMode1(sm1) + , sgpmIdx(si) + { + } + ModeInfo &operator=(const ModeInfo &other) + { + mipFlg = other.mipFlg; // CU::mipFlag + mipTrFlg = other.mipTrFlg; // PU::mipTransposedFlag + mRefId = other.mRefId; // PU::multiRefIdx + ispMod = other.ispMod; // CU::ispMode + modeId = other.modeId; // PU::intraDir[CHANNEL_TYPE_LUMA] + tmpFlag = other.tmpFlag; // CU::tmpFlag + sgpmFlag = other.sgpmFlag; // CU::sgpmFlag + sgpmSplitDir = other.sgpmSplitDir; + sgpmMode0 = other.sgpmMode0; + sgpmMode1 = other.sgpmMode1; + sgpmIdx = other.sgpmIdx; + return *this; + } + bool operator==(const ModeInfo cmp) const + { + return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod + && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag && sgpmFlag == cmp.sgpmFlag + && sgpmSplitDir == cmp.sgpmSplitDir); // sgpmMode0 and sgpmMode1 seems no need + } +#elif JVET_V0130_INTRA_TMP ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0), tmpFlag(0) {} ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf = 0) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode), tmpFlag(tpmf) {} bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag); } @@ -241,7 +287,19 @@ private: { double rdCost; ModeInfoWithCost() : ModeInfo(), rdCost(MAX_DOUBLE) {} -#if JVET_V0130_INTRA_TMP +#if JVET_AB0155_SGPM && JVET_V0130_INTRA_TMP + ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, + const bool tpmf, double cost, const bool sf = 0, const int sd = 0, const int sm0 = 0, const int sm1 = 0) + : ModeInfo(mipf, miptf, mrid, ispm, mode, tpmf, sf, sd, sm0, sm1), rdCost(cost) + { + } + bool operator==(const ModeInfoWithCost cmp) const + { + return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod + && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag && rdCost == cmp.rdCost && sgpmFlag == cmp.sgpmFlag + && sgpmSplitDir == cmp.sgpmSplitDir); // sgpmMode0 and sgpmMode1 seems no need + } +#elif JVET_V0130_INTRA_TMP ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode, tpmf), rdCost(cost) {} bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag && rdCost == cmp.rdCost); } #else @@ -403,6 +461,19 @@ private: static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_dSavedModeCostLFNST; static_vector<double, FAST_UDI_MAX_RDMODE_NUM> m_dSavedHadListLFNST; +#if JVET_AB0155_SGPM + static_vector<ModeInfo, SGPM_NUM> m_uiSavedRdModeListSGPM; + static_vector<ModeInfo, SGPM_NUM> m_uiSavedHadModeListSGPM; + static_vector<double, SGPM_NUM> m_dSavedModeCostSGPM; + static_vector<double, SGPM_NUM> m_dSavedHadListSGPM; + + Pel* m_intraPredBuf[NUM_LUMA_MODE]; + Pel* m_sgpmPredBuf[SGPM_NUM]; + uint8_t m_intraModeReady[NUM_LUMA_MODE]; + + size_t m_numSGPMCands; +#endif + PelStorage m_tmpStorageLCU; PelStorage m_colorTransResiBuf; #if JVET_AB0143_CCCM_TS diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp index 7f6f217e580919818ddeba4d3e563159a24a2934..a5a139542f6861d8eb962330198a1e16614c504a 100644 --- a/source/Lib/EncoderLib/VLCWriter.cpp +++ b/source/Lib/EncoderLib/VLCWriter.cpp @@ -1566,6 +1566,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS ) #if JVET_W0123_TIMD_FUSION WRITE_FLAG( pcSPS->getUseTimd() ? 1 : 0, "sps_timd_enabled_flag"); +#endif +#if JVET_AB0155_SGPM + WRITE_FLAG(pcSPS->getUseSgpm() ? 1 : 0, "sps_sgpm_enabled_flag"); #endif if( pcSPS->getChromaFormatIdc() != CHROMA_400) { @@ -3230,6 +3233,9 @@ void HLSWriter::codeConstraintInfo ( const ConstraintInfo* cinfo ) #if JVET_W0123_TIMD_FUSION WRITE_FLAG(cinfo->getNoTimdConstraintFlag() ? 1 : 0, "gci_no_timd_constraint_flag" ); #endif +#if JVET_AB0155_SGPM + WRITE_FLAG(cinfo->getNoSgpmConstraintFlag() ? 1 : 0, "gci_no_sgpm_constraint_flag"); +#endif /* inter */ WRITE_FLAG(cinfo->getNoRprConstraintFlag() ? 1 : 0, "gci_no_ref_pic_resampling_constraint_flag");