diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 115a5dcb6044553dfc85c36bf24c81889b080d3b..5b73a47645f03f637f56b6429f249eeb2ef6dc4b 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -11437,13 +11437,21 @@ void InterPrediction::sortIbcAdaptiveMergeMbvdCandidates(PredictionUnit &pu, Me if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (uiCost < m_mbvdCandCostList[endEncIdx - 1] && m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -11591,14 +11599,22 @@ void InterPrediction::sortIbcMergeMbvdCandidates(PredictionUnit &pu, MergeCtx& if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -12519,14 +12535,22 @@ void InterPrediction::sortInterMergeMMVDCandidates(PredictionUnit &pu, MergeCtx } if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -17191,14 +17215,22 @@ void InterPrediction::adjustAffineMergeCandidates(PredictionUnit &pu, AffineMer if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -17673,13 +17705,21 @@ void InterPrediction::adjustAffineMergeCandidates(PredictionUnit &pu, AffineMerg if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -17780,13 +17820,21 @@ void InterPrediction::adjustAffineMergeCandidates(PredictionUnit &pu, AffineMerg ); if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -17885,13 +17933,21 @@ void InterPrediction::adjustAffineMergeCandidates(PredictionUnit &pu, AffineMerg ); if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -19641,14 +19697,22 @@ void InterPrediction::adjustIBCMergeCandidates(PredictionUnit &pu, MergeCtx& mr if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop, pcBufPredRefTop, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop, pcBufPredRefTop, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft, pcBufPredRefLeft, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft, pcBufPredRefLeft, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -19825,14 +19889,22 @@ void InterPrediction::adjustIBCMergeCandidates(PredictionUnit &pu, MergeCtx& mr if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop, pcBufPredRefTop, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop, pcBufPredRefTop, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft, pcBufPredRefLeft, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft, pcBufPredRefLeft, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -19950,14 +20022,22 @@ void InterPrediction::adjustAffineMergeCandidatesOneGroup(PredictionUnit &pu, A ); if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -20140,7 +20220,11 @@ Distortion InterPrediction::getTempCost(const PredictionUnit &pu, const PelBuf & Distortion uiCost; DistParam cDistParam; cDistParam.applyWeight = false; +#if JVET_AJ0096_SATD_REORDER_INTRA + m_pcRdCost->setDistParam(cDistParam, cur, org, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, cur, org, pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost = cDistParam.distFunc(cDistParam); return uiCost; } @@ -31955,7 +32039,11 @@ void InterPrediction::deriveAffineMVDCandVecFromMotionInforPred(const Prediction const bool res = getAffAMLRefTemplateMvdPredUni<1>(tmpPU, pcBufPredRefTop, pcBufPredRefLeft, pu.cs->sps->getUseFastSubTmvp(), tmp); if (res) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } else @@ -31994,7 +32082,11 @@ void InterPrediction::deriveAffineMVDCandVecFromMotionInforPred(const Prediction const bool res = getAffAMLRefTemplateMvdPredUni<2>(tmpPU, pcBufPredRefTop, pcBufPredRefLeft, pu.cs->sps->getUseFastSubTmvp(), tmp); if (res) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } else @@ -32270,7 +32362,11 @@ void InterPrediction::reorderRefCombList(PredictionUnit &pu, std::vector<RefList } if (res) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } else @@ -32289,7 +32385,11 @@ void InterPrediction::reorderRefCombList(PredictionUnit &pu, std::vector<RefList } if (res) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } else @@ -32426,14 +32526,22 @@ void InterPrediction::reorderRefCombList(PredictionUnit &pu, std::vector<RefList { if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -33351,9 +33459,17 @@ void InterPrediction::reorderRefPairList(PredictionUnit &pu, std::vector<RefPicP if (m_bAMLTemplateAvailabe[0]) { #if JVET_AD0140_MVD_PREDICTION +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), ((identicalMotion && !tmpPU.cu->licFlag)? pcBufPredRefTopIdMotion:pcBufPredRefTop).Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), ((identicalMotion && !tmpPU.cu->licFlag)? pcBufPredRefTopIdMotion:pcBufPredRefTop).Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif +#else +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); #else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif #endif uiCost += cDistParam.distFunc(cDistParam); @@ -33362,9 +33478,17 @@ void InterPrediction::reorderRefPairList(PredictionUnit &pu, std::vector<RefPicP if (m_bAMLTemplateAvailabe[1]) { #if JVET_AD0140_MVD_PREDICTION +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), ((identicalMotion && !tmpPU.cu->licFlag) ? pcBufPredRefLeftIdMotion : pcBufPredRefLeft).Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), ((identicalMotion && !tmpPU.cu->licFlag) ? pcBufPredRefLeftIdMotion : pcBufPredRefLeft).Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif +#else +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); #else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif #endif uiCost += cDistParam.distFunc(cDistParam); @@ -33721,7 +33845,11 @@ void InterPrediction::reorderRefPairList(PredictionUnit &pu, std::vector<RefPicP } #endif +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -33774,7 +33902,11 @@ void InterPrediction::reorderRefPairList(PredictionUnit &pu, std::vector<RefPicP } #endif +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } #if JVET_AD0140_MVD_PREDICTION @@ -35666,14 +35798,22 @@ void InterPrediction::defineSignHypMatchAffine(PredictionUnit& pu, const RefPicL uiCost = 0; if (m_bAMLTemplateAvailabe[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (m_bAMLTemplateAvailabe[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } @@ -36463,12 +36603,20 @@ void InterPrediction::defineSignHypMatchAffine(PredictionUnit& pu, const RefPicL if (numTemplate[0]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurTop.Y(), pcBufPredRefTop.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (numTemplate[1]) { +#if JVET_AJ0096_SATD_REORDER_INTER + m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, true); +#else m_pcRdCost->setDistParam(cDistParam, pcBufPredCurLeft.Y(), pcBufPredRefLeft.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } #endif diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp index f800a2da54e3c2fed3f3427f0e4fdfa19a5f0b62..e899340f1f517f4ec0eb17863dae9c613be2fbc6 100644 --- a/source/Lib/CommonLib/IntraPrediction.cpp +++ b/source/Lib/CommonLib/IntraPrediction.cpp @@ -3025,13 +3025,21 @@ Mv IntraPrediction::refineChromaBv(const ComponentID compId, const PredictionUni if (topCanUse) { PelBuf tempRef = PelBuf(refPix + 1, uiWidth, Size(uiWidth, DBV_TEMPLATE_SIZE)); +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, tempCurTop, tempRef, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), compId, uiWidth >= 4 && uiHeight >= 4 ? true : false); +#else m_dbvSadCost->setDistParam(cDistParam, tempCurTop, tempRef, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), compId, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } if (leftCanUse) { PelBuf tempRef = PelBuf(refPix + 1 + stride, uiHeight, Size(uiHeight, DBV_TEMPLATE_SIZE)); +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, tempCurLeft, tempRef, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), compId, uiWidth >= 4 && uiHeight >= 4 ? true : false); +#else m_dbvSadCost->setDistParam(cDistParam, tempCurLeft, tempRef, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), compId, false); +#endif uiCost += cDistParam.distFunc(cDistParam); } aBvCostVec.push_back(std::pair<Mv, Distortion>(*it, uiCost)); @@ -4793,7 +4801,11 @@ void IntraPrediction::geneChromaFusionPred(const ComponentID compId, PelBuf &piP if (numSample > 0) { int bestSAD = cccmSAD < cclmSAD ? cccmSAD : cclmSAD; +#if JVET_AJ0096_SATD_REORDER_INTRA + if (bestSAD > 144 * numSample) +#else if (bestSAD > 64 * numSample) +#endif { w0 = 3; w1 = 1; @@ -7906,21 +7918,39 @@ void IntraPrediction::deriveMPMSorted(const PredictionUnit& pu, uint8_t* mpm, in distParamSad[1].useMR = false; if (eTempType == LEFT_ABOVE_NEIGHBOR) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, + uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, + piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); +#else m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, false); // Use HAD (SATD) cost m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, false); +#endif } else if (eTempType == LEFT_NEIGHBOR) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); +#else m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, false); +#endif } else if (eTempType == ABOVE_NEIGHBOR) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, + COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); +#else m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, false); +#endif } initTimdIntraPatternLuma(*pu.cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight); @@ -21746,6 +21776,19 @@ uint32_t IntraPrediction::xCalculateCCLMcost(const PredictionUnit &pu, const Com const SizeType cWidth = chromaArea.width; const SizeType cHeight = chromaArea.height; +#if JVET_AJ0096_SATD_REORDER_INTRA + const ChannelType chType = toChannelType(compID); + DistParam cDistParam; + cDistParam.applyWeight = false; + static Pel predChromaA[MAX_CU_SIZE]; + static Pel predChromaL[MAX_CU_SIZE]; + PelBuf predTop(predChromaA, cWidth, 1); + PelBuf predLeft(predChromaL, 1, cHeight); + static Pel reconChromaA[MAX_CU_SIZE]; + static Pel reconChromaL[MAX_CU_SIZE]; + PelBuf reconTop(reconChromaA, cWidth, 1); + PelBuf reconLeft(reconChromaL, 1, cHeight); +#endif CodingStructure &cs = *(pu.cs); const CodingUnit &cu = *(pu.cu); @@ -21780,8 +21823,17 @@ uint32_t IntraPrediction::xCalculateCCLMcost(const PredictionUnit &pu, const Com { predChroma = ClipPel(rightShift(cclmModel.a2 * src[pos], cclmModel.shift2) + cclmModel.b2, pu.cs->slice->clpRng(compID)); } +#if JVET_AJ0096_SATD_REORDER_INTRA + predTop.at(pos, 0) = predChroma; + reconTop.at(pos, 0) = curChroma0[pos]; +#else totalSAD += abs(predChroma - curChroma0[pos]); +#endif } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predTop, reconTop, pu.cs->sps->getBitDepth(chType), compID, cWidth >= 4 && cHeight >= 4 ? true : false); + totalSAD += (int)cDistParam.distFunc(cDistParam); +#endif } else #endif @@ -21789,8 +21841,17 @@ uint32_t IntraPrediction::xCalculateCCLMcost(const PredictionUnit &pu, const Com for (int pos = 0; pos < cWidth; pos++) { Pel predChroma = ClipPel(rightShift(cclmModel.a * src[pos], cclmModel.shift) + cclmModel.b, pu.cs->slice->clpRng(compID)); +#if JVET_AJ0096_SATD_REORDER_INTRA + predTop.at(pos, 0) = predChroma; + reconTop.at(pos, 0) = curChroma0[pos]; +#else totalSAD += abs(predChroma - curChroma0[pos]); +#endif } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predTop, reconTop, pu.cs->sps->getBitDepth(chType), compID, cWidth >= 4 && cHeight >= 4 ? true : false); + totalSAD += (int)cDistParam.distFunc(cDistParam); +#endif } } @@ -21814,8 +21875,17 @@ uint32_t IntraPrediction::xCalculateCCLMcost(const PredictionUnit &pu, const Com { predChroma = ClipPel(rightShift(cclmModel.a2 * src[pos * srcStride], cclmModel.shift2) + cclmModel.b2, pu.cs->slice->clpRng(compID)); } +#if JVET_AJ0096_SATD_REORDER_INTRA + predLeft.at(0, pos) = predChroma; + reconLeft.at(0, pos) = curChroma0[pos * curStride]; +#else totalSAD += abs(predChroma - curChroma0[pos * curStride]); +#endif } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predLeft, reconLeft, pu.cs->sps->getBitDepth(chType), compID, cWidth >= 4 && cHeight >= 4 ? true : false); + totalSAD += (int)cDistParam.distFunc(cDistParam); +#endif } else #endif @@ -21823,8 +21893,17 @@ uint32_t IntraPrediction::xCalculateCCLMcost(const PredictionUnit &pu, const Com for (int pos = 0; pos < cHeight; pos++) { Pel predChroma = ClipPel(rightShift(cclmModel.a * src[pos * srcStride], cclmModel.shift) + cclmModel.b, pu.cs->slice->clpRng(compID)); +#if JVET_AJ0096_SATD_REORDER_INTRA + predLeft.at(0, pos) = predChroma; + reconLeft.at(0, pos) = curChroma0[pos * curStride]; +#else totalSAD += abs(predChroma - curChroma0[pos * curStride]); +#endif } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predLeft, reconLeft, pu.cs->sps->getBitDepth(chType), compID, cWidth >= 4 && cHeight >= 4 ? true : false); + totalSAD += (int)cDistParam.distFunc(cDistParam); +#endif } } @@ -21902,6 +21981,19 @@ uint32_t IntraPrediction::xCalculateCCCMcost(const PredictionUnit &pu, const Com const SizeType cWidth = chromaArea.width; const SizeType cHeight = chromaArea.height; +#if JVET_AJ0096_SATD_REORDER_INTRA + const ChannelType chType = toChannelType(compID); + DistParam cDistParam; + cDistParam.applyWeight = false; + static Pel predChromaA[MAX_CU_SIZE]; + static Pel predChromaL[MAX_CU_SIZE]; + PelBuf predTop(predChromaA, cWidth, 1); + PelBuf predLeft(predChromaL, 1, cHeight); + static Pel reconChromaA[MAX_CU_SIZE]; + static Pel reconChromaL[MAX_CU_SIZE]; + PelBuf reconTop(reconChromaA, cWidth, 1); + PelBuf reconLeft(reconChromaL, 1, cHeight); +#endif CodingStructure &cs = *(pu.cs); const CodingUnit &cu = *(pu.cu); @@ -21953,8 +22045,17 @@ uint32_t IntraPrediction::xCalculateCCCMcost(const PredictionUnit &pu, const Com #endif predChroma = ClipPel<Pel>(cccmModel[0].convolve(samples), clpRng); +#if JVET_AJ0096_SATD_REORDER_INTRA + predTop.at(pos, 0) = predChroma; + reconTop.at(pos, 0) = curChroma0[pos]; +#else totalSAD += abs(predChroma - curChroma0[pos]); +#endif } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predTop, reconTop, pu.cs->sps->getBitDepth(chType), compID, cWidth >= 4 && cHeight >= 4 ? true : false); + totalSAD += (int)cDistParam.distFunc(cDistParam); +#endif } if (checkLeft) @@ -21989,8 +22090,17 @@ uint32_t IntraPrediction::xCalculateCCCMcost(const PredictionUnit &pu, const Com else #endif predChroma = ClipPel<Pel>(cccmModel[0].convolve(samples), clpRng); +#if JVET_AJ0096_SATD_REORDER_INTRA + predLeft.at(0, pos) = predChroma; + reconLeft.at(0, pos) = curChroma0[pos * curStride]; +#else totalSAD += abs(predChroma - curChroma0[pos * curStride]); +#endif } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predLeft, reconLeft, pu.cs->sps->getBitDepth(chType), compID, cWidth >= 4 && cHeight >= 4 ? true : false); + totalSAD += (int)cDistParam.distFunc(cDistParam); +#endif } return totalSAD; @@ -27274,12 +27384,22 @@ void IntraPrediction::getTmrlList(CodingUnit& cu) distParamSad[1].applyWeight = false; distParamSad[1].useMR = false; +#if JVET_AJ0096_SATD_REORDER_INTRA + m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + tmrlInfo.uiTemplateLeft, piPred + tmrlInfo.uiTemplateLeft, iOrgStride, uiPredStride, + channelBitDepth, COMPONENT_Y, uiWidth, tmrlInfo.uiTemplateAbove, 0, 1, true); +#else m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + tmrlInfo.uiTemplateLeft, piPred + tmrlInfo.uiTemplateLeft, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, tmrlInfo.uiTemplateAbove, 0, 1, false); +#endif if (cu.lx()) { +#if JVET_AJ0096_SATD_REORDER_INTRA + m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + tmrlInfo.uiTemplateAbove * iOrgStride, piPred + tmrlInfo.uiTemplateAbove * uiPredStride, + iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, tmrlInfo.uiTemplateLeft, uiHeight, 0, 1, true); +#else m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + tmrlInfo.uiTemplateAbove * iOrgStride, piPred + tmrlInfo.uiTemplateAbove * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, tmrlInfo.uiTemplateLeft, uiHeight, 0, 1, false); +#endif } // step-2. define search range. @@ -28253,7 +28373,11 @@ void IntraPrediction::reorderEipCands(const PredictionUnit& pu, static_vector<Ei predTop.at(w, h) = ClipPel(cand.convolve(inputs), clipRng); } } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predTop, recoTop, pu.cs->sps->getBitDepth(chType), compId, true); +#else m_dbvSadCost->setDistParam(cDistParam, predTop, recoTop, pu.cs->sps->getBitDepth(chType), compId, false); +#endif uiCost += cDistParam.distFunc(cDistParam); for (int h = 0; h < blockHeight; h++) @@ -28264,7 +28388,11 @@ void IntraPrediction::reorderEipCands(const PredictionUnit& pu, static_vector<Ei predLeft.at(w, h) = ClipPel(cand.convolve(inputs), clipRng); } } +#if JVET_AJ0096_SATD_REORDER_INTRA + m_dbvSadCost->setDistParam(cDistParam, predLeft, recoLeft, pu.cs->sps->getBitDepth(chType), compId, true); +#else m_dbvSadCost->setDistParam(cDistParam, predLeft, recoLeft, pu.cs->sps->getBitDepth(chType), compId, false); +#endif uiCost += cDistParam.distFunc(cDistParam); updateCandList(model, uiCost, tmpCandList, candCostList, NUM_EIP_MERGE_SIGNAL); diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp index 608a946e414b0145e39ffa646fef1c5a23cffe6a..da0d801cd2ab7c16a936ad2e34b4385ffb9fceb4 100644 --- a/source/Lib/CommonLib/RdCost.cpp +++ b/source/Lib/CommonLib/RdCost.cpp @@ -2346,6 +2346,196 @@ Distortion RdCost::xCalcHADs1xN(const Pel* piOrg, const Pel* piCur, int iStrideO return satd; } #endif + +#if JVET_AJ0096_SATD_REORDER_INTRA || JVET_AJ0096_SATD_REORDER_INTER +Distortion RdCost::xCalcHADs1x16( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iRows, int iCols) +{ + int j, sad = 0; + int diff[16], m1[16], m2[16]; + if (iRows == 1) + { + diff[0] = piOrg[0] - piCur[0]; + diff[1] = piOrg[1] - piCur[1]; + diff[2] = piOrg[2] - piCur[2]; + diff[3] = piOrg[3] - piCur[3]; + diff[4] = piOrg[4] - piCur[4]; + diff[5] = piOrg[5] - piCur[5]; + diff[6] = piOrg[6] - piCur[6]; + diff[7] = piOrg[7] - piCur[7]; + + diff[8] = piOrg[8] - piCur[8]; + diff[9] = piOrg[9] - piCur[9]; + diff[10] = piOrg[10] - piCur[10]; + diff[11] = piOrg[11] - piCur[11]; + diff[12] = piOrg[12] - piCur[12]; + diff[13] = piOrg[13] - piCur[13]; + diff[14] = piOrg[14] - piCur[14]; + diff[15] = piOrg[15] - piCur[15]; + } + else if (iCols == 1) + { + diff[0] = piOrg[0] - piCur[0]; + diff[1] = piOrg[1 * iStrideOrg] - piCur[1 * iStrideCur]; + diff[2] = piOrg[2 * iStrideOrg] - piCur[2 * iStrideCur]; + diff[3] = piOrg[3 * iStrideOrg] - piCur[3 * iStrideCur]; + diff[4] = piOrg[4 * iStrideOrg] - piCur[4 * iStrideCur]; + diff[5] = piOrg[5 * iStrideOrg] - piCur[5 * iStrideCur]; + diff[6] = piOrg[6 * iStrideOrg] - piCur[6 * iStrideCur]; + diff[7] = piOrg[7 * iStrideOrg] - piCur[7 * iStrideCur]; + + diff[8] = piOrg[8 * iStrideOrg] - piCur[8 * iStrideCur]; + diff[9] = piOrg[9 * iStrideOrg] - piCur[9 * iStrideCur]; + diff[10] = piOrg[10 * iStrideOrg] - piCur[10 * iStrideCur]; + diff[11] = piOrg[11 * iStrideOrg] - piCur[11 * iStrideCur]; + diff[12] = piOrg[12 * iStrideOrg] - piCur[12 * iStrideCur]; + diff[13] = piOrg[13 * iStrideOrg] - piCur[13 * iStrideCur]; + diff[14] = piOrg[14 * iStrideOrg] - piCur[14 * iStrideCur]; + diff[15] = piOrg[15 * iStrideOrg] - piCur[15 * iStrideCur]; + } + else + { + CHECK(1, "shall not be here"); + } + + m2[0] = diff[0] + diff[8]; + m2[1] = diff[1] + diff[9]; + m2[2] = diff[2] + diff[10]; + m2[3] = diff[3] + diff[11]; + m2[4] = diff[4] + diff[12]; + m2[5] = diff[5] + diff[13]; + m2[6] = diff[6] + diff[14]; + m2[7] = diff[7] + diff[15]; + m2[8] = diff[0] - diff[8]; + m2[9] = diff[1] - diff[9]; + m2[10] = diff[2] - diff[10]; + m2[11] = diff[3] - diff[11]; + m2[12] = diff[4] - diff[12]; + m2[13] = diff[5] - diff[13]; + m2[14] = diff[6] - diff[14]; + m2[15] = diff[7] - diff[15]; + + m1[0] = m2[0] + m2[4]; + m1[1] = m2[1] + m2[5]; + m1[2] = m2[2] + m2[6]; + m1[3] = m2[3] + m2[7]; + m1[4] = m2[0] - m2[4]; + m1[5] = m2[1] - m2[5]; + m1[6] = m2[2] - m2[6]; + m1[7] = m2[3] - m2[7]; + m1[8] = m2[8] + m2[12]; + m1[9] = m2[9] + m2[13]; + m1[10] = m2[10] + m2[14]; + m1[11] = m2[11] + m2[15]; + m1[12] = m2[8] - m2[12]; + m1[13] = m2[9] - m2[13]; + m1[14] = m2[10] - m2[14]; + m1[15] = m2[11] - m2[15]; + + m2[0] = m1[0] + m1[2]; + m2[1] = m1[1] + m1[3]; + m2[2] = m1[0] - m1[2]; + m2[3] = m1[1] - m1[3]; + m2[4] = m1[4] + m1[6]; + m2[5] = m1[5] + m1[7]; + m2[6] = m1[4] - m1[6]; + m2[7] = m1[5] - m1[7]; + m2[8] = m1[8] + m1[10]; + m2[9] = m1[9] + m1[11]; + m2[10] = m1[8] - m1[10]; + m2[11] = m1[9] - m1[11]; + m2[12] = m1[12] + m1[14]; + m2[13] = m1[13] + m1[15]; + m2[14] = m1[12] - m1[14]; + m2[15] = m1[13] - m1[15]; + + for( j = 0; j < 16; j++ ) + { + sad += abs( m2[j] ); + } + +#if JVET_R0164_MEAN_SCALED_SATD + sad -= abs(m2[0]); + sad += abs(m2[0]) >> 2; +#endif + sad = ( int ) ( sad / sqrt( 16.0 * 1 ) * 2 ); + + return sad; +} + +Distortion RdCost::xCalcHADs1x8( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iRows, int iCols) +{ + int j; + Distortion sad = 0; + int diff[8], m1[8], m2[8]; + if (iRows == 1) + { + diff[0] = piOrg[0] - piCur[0]; + diff[1] = piOrg[1] - piCur[1]; + diff[2] = piOrg[2] - piCur[2]; + diff[3] = piOrg[3] - piCur[3]; + diff[4] = piOrg[4] - piCur[4]; + diff[5] = piOrg[5] - piCur[5]; + diff[6] = piOrg[6] - piCur[6]; + diff[7] = piOrg[7] - piCur[7]; + } + else if (iCols == 1) + { + diff[0] = piOrg[0] - piCur[0]; + diff[1] = piOrg[1 * iStrideOrg] - piCur[1 * iStrideCur]; + diff[2] = piOrg[2 * iStrideOrg] - piCur[2 * iStrideCur]; + diff[3] = piOrg[3 * iStrideOrg] - piCur[3 * iStrideCur]; + diff[4] = piOrg[4 * iStrideOrg] - piCur[4 * iStrideCur]; + diff[5] = piOrg[5 * iStrideOrg] - piCur[5 * iStrideCur]; + diff[6] = piOrg[6 * iStrideOrg] - piCur[6 * iStrideCur]; + diff[7] = piOrg[7 * iStrideOrg] - piCur[7 * iStrideCur]; + } + else + { + CHECK(1, "shall not be here"); + } + + m2[0] = diff[0] + diff[4]; + m2[1] = diff[1] + diff[5]; + m2[2] = diff[2] + diff[6]; + m2[3] = diff[3] + diff[7]; + m2[4] = diff[0] - diff[4]; + m2[5] = diff[1] - diff[5]; + m2[6] = diff[2] - diff[6]; + m2[7] = diff[3] - diff[7]; + + m1[0] = m2[0] + m2[2]; + m1[1] = m2[1] + m2[3]; + m1[2] = m2[0] - m2[2]; + m1[3] = m2[1] - m2[3]; + m1[4] = m2[4] + m2[6]; + m1[5] = m2[5] + m2[7]; + m1[6] = m2[4] - m2[6]; + m1[7] = m2[5] - m2[7]; + + m2[0] = m1[0] + m1[1]; + m2[1] = m1[0] - m1[1]; + m2[2] = m1[2] + m1[3]; + m2[3] = m1[2] - m1[3]; + m2[4] = m1[4] + m1[5]; + m2[5] = m1[4] - m1[5]; + m2[6] = m1[6] + m1[7]; + m2[7] = m1[6] - m1[7]; + + for (j = 0; j < 8; j++) + { + sad += abs(m2[j]); + } + +#if JVET_R0164_MEAN_SCALED_SATD + sad -= abs(m2[0]); + sad += abs(m2[0]) >> 2; +#endif + sad = ( int )( sad / sqrt( 8.0 * 1 ) * 2 ); + + return sad; +} +#endif + Distortion RdCost::xCalcHADs2x2( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iStep ) { Distortion satd = 0; @@ -3134,6 +3324,40 @@ Distortion RdCost::xGetHADs( const DistParam &rcDtParam ) piCur += iOffsetCur; } } +#if JVET_AJ0096_SATD_REORDER_INTRA || JVET_AJ0096_SATD_REORDER_INTER + else if (iRows == 1 && iCols % 16 == 0) + { + for( x = 0; x < iCols; x += 16 ) + { + uiSum += xCalcHADs1x16(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iRows, 16); + } + } + else if (iCols == 1 && iRows % 16 == 0) + { + for( y = 0; y < iRows; y += 16 ) + { + uiSum += xCalcHADs1x16( &piOrg[0], &piCur[0], iStrideOrg, iStrideCur, 16, iCols ); + piOrg += (iStrideOrg << 4); + piCur += (iStrideCur << 4); + } + } + else if (iRows == 1 && iCols % 8 == 0) + { + for( x = 0; x < iCols; x += 8 ) + { + uiSum += xCalcHADs1x8(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iRows, 8); + } + } + else if (iCols == 1 && iRows % 8 == 0) + { + for( y = 0; y < iRows; y += 8 ) + { + uiSum += xCalcHADs1x8(&piOrg[0], &piCur[0], iStrideOrg, iStrideCur, 8, iCols); + piOrg += (iStrideOrg << 3); + piCur += (iStrideCur << 3); + } + } +#endif #if JVET_AI0185_ADAPTIVE_COST_IN_MERGE_MODE else if (iRows == 1 || iCols == 1) { diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h index 57dce9fb2a65c31e779ce35f9b03005c6c5e6df7..b767de0b2e8bed135ddba8100eed678d26274151 100644 --- a/source/Lib/CommonLib/RdCost.h +++ b/source/Lib/CommonLib/RdCost.h @@ -1097,6 +1097,10 @@ private: static Distortion xGetHADs ( const DistParam& pcDtParam ); #if JVET_AI0185_ADAPTIVE_COST_IN_MERGE_MODE static Distortion xCalcHADs1xN ( const Pel* piOrg, const Pel* piCurr, int iStrideOrg, int iStrideCur, int iRows, int iCols); +#endif +#if JVET_AJ0096_SATD_REORDER_INTRA || JVET_AJ0096_SATD_REORDER_INTER + static Distortion xCalcHADs1x16 ( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iRows, int iCols); + static Distortion xCalcHADs1x8 ( const Pel *piOrg, const Pel *piCur, int iStrideOrg, int iStrideCur, int iRows, int iCols); #endif static Distortion xCalcHADs2x2 ( const Pel *piOrg, const Pel *piCurr, int iStrideOrg, int iStrideCur, int iStep ); static Distortion xCalcHADs4x4 ( const Pel *piOrg, const Pel *piCurr, int iStrideOrg, int iStrideCur, int iStep ); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 8d1d8515f3e526193f87fb87f5295c1cfa3bf573..58801c1d722b0b5c3cbf1d812f221799831d32b6 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -51,8 +51,6 @@ #include <cassert> #include <cstdint> - - #define BASE_ENCODER 1 #define BASE_NORMATIVE 1 #define TOOLS 1 @@ -258,6 +256,7 @@ #define JVET_AE0094_IBC_NONADJACENT_SPATIAL_CANDIDATES 1 // JVET-AE0094: IBC with non-adjacent spatial candidates #define JVET_AG0091_ARBVP 1 // JVET-AG0091: Auto-relocated block vector prediction #define JVET_AI0082_TEMPORAL_BV 1 // JVET-AI0081: Temporal BV for IBC merge list construction +#define JVET_AJ0096_SATD_REORDER_INTRA 1 // JVET-AJ0096: SATD-based reordering for intra coding #if JVET_AC0071_DBV && JVET_V0130_INTRA_TMP #define JVET_AF0066_ENABLE_DBV_4_SINGLE_TREE 1 // JVET-AF0066: Enable DBV mode in single tree configuration @@ -363,6 +362,7 @@ #define JVET_AI0185_ADAPTIVE_COST_IN_MERGE_MODE 1 // JVET-AI0185 adaptive cost function selection in merge mode #define JVET_AI0183_MVP_EXTENSION 1 // JVET-AI0183 MVP extension // Inter template matching tools +#define JVET_AJ0096_SATD_REORDER_INTER 1 // JVET-AJ0096: SATD-based reordering for inter coding #define ENABLE_INTER_TEMPLATE_MATCHING 1 // It controls whether template matching is enabled for inter prediction #if ENABLE_INTER_TEMPLATE_MATCHING #define TM_AMVP 1 // Add template matching to non-subblock inter to refine regular AMVP candidates diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h index 8cac6fc5ca594bb3612633e03ad7f705135a879f..5a877a07ed810536e120974461d856b25f70f7a3 100644 --- a/source/Lib/CommonLib/x86/RdCostX86.h +++ b/source/Lib/CommonLib/x86/RdCostX86.h @@ -2102,6 +2102,146 @@ static uint32_t xCalcHAD16x8_AVX2( const Torg *piOrg, const Tcur *piCur, const i return (sad); } +#if JVET_AJ0096_SATD_REORDER_INTRA || JVET_AJ0096_SATD_REORDER_INTER +static uint32_t xCalcHADs1x16_SSE(const Torg* piOrg, const Tcur* piCur, int iStrideOrg, int iStrideCur, int iRows, int iCols) +{ + __m128i diff[4], m1[4], m2[4]; + + if (iRows == 1) + { + __m128i org0 = _mm_loadu_si128((__m128i*)&piOrg[0]); + __m128i cur0 = _mm_loadu_si128((__m128i*)&piCur[0]); + diff[0] = _mm_sub_epi16(org0, cur0); + diff[1] = _mm_cvtepi16_epi32( _mm_srli_si128( diff[0], 8 ) ); + diff[0] = _mm_cvtepi16_epi32( diff[0] ); + + org0 = _mm_loadu_si128((__m128i*)&piOrg[8]); + cur0 = _mm_loadu_si128((__m128i*)&piCur[8]); + diff[2] = _mm_sub_epi16(org0, cur0); + diff[3] = _mm_cvtepi16_epi32( _mm_srli_si128( diff[2], 8 ) ); + diff[2] = _mm_cvtepi16_epi32( diff[2] ); + } + else if (iCols == 1) + { + Pel diffI[16]; + for (int i = 0; i < 16; i++) + { + diffI[i] = piOrg[0] - piCur[0]; + piOrg += iStrideOrg; + piCur += iStrideCur; + } + diff[0] = _mm_loadu_si128((__m128i*)&diffI[0]); + diff[1] = _mm_cvtepi16_epi32( _mm_srli_si128( diff[0], 8 ) ); + diff[0] = _mm_cvtepi16_epi32( diff[0] ); + diff[2] = _mm_loadu_si128((__m128i*)&diffI[8]); + diff[3] = _mm_cvtepi16_epi32( _mm_srli_si128( diff[2], 8 ) ); + diff[2] = _mm_cvtepi16_epi32( diff[2] ); + } + else + { + std::cerr << "shall not be here" << std::endl; + return -1; + } + + m2[0] = _mm_add_epi32(diff[0], diff[2]); + m2[1] = _mm_add_epi32(diff[1], diff[3]); + m2[2] = _mm_sub_epi32(diff[0], diff[2]); + m2[3] = _mm_sub_epi32(diff[1], diff[3]); + + m1[0] = _mm_add_epi32(m2[0], m2[1]); + m1[1] = _mm_sub_epi32(m2[0], m2[1]); + m1[2] = _mm_add_epi32(m2[2], m2[3]); + m1[3] = _mm_sub_epi32(m2[2], m2[3]); + + m2[0] = _mm_unpacklo_epi32(m1[0], m1[1]); + m2[1] = _mm_unpackhi_epi32(m1[0], m1[1]); + m2[2] = _mm_abs_epi32(_mm_add_epi32(m2[0], m2[1])); +#if JVET_R0164_MEAN_SCALED_SATD + uint32_t absDc = _mm_cvtsi128_si32(m2[2]); +#endif + m2[3] = _mm_abs_epi32(_mm_sub_epi32(m2[0], m2[1])); + __m128i iSum = _mm_add_epi32(m2[2], m2[3]); + m2[0] = _mm_unpacklo_epi32(m1[2], m1[3]); + m2[1] = _mm_unpackhi_epi32(m1[2], m1[3]); + m2[2] = _mm_abs_epi32(_mm_add_epi32(m2[0], m2[1])); + m2[3] = _mm_abs_epi32(_mm_sub_epi32(m2[0], m2[1])); + iSum = _mm_add_epi32(iSum, m2[2]); + iSum = _mm_add_epi32(iSum, m2[3]); + iSum = _mm_add_epi32(iSum, _mm_shuffle_epi32(iSum, 0x4e)); // 01001110 + iSum = _mm_add_epi32(iSum, _mm_shuffle_epi32(iSum, 0xb1)); // 10110001 + uint32_t sad = _mm_cvtsi128_si32( iSum ); + +#if JVET_R0164_MEAN_SCALED_SATD + sad -= absDc; + sad += absDc >> 2; +#endif + sad = sad >> 1; + + return sad; +} + +static uint32_t xCalcHADs1x8_SSE(const Torg* piOrg, const Tcur* piCur, int iStrideOrg, int iStrideCur, int iRows, int iCols) +{ + __m128i diff[2], m1[2], m2[2]; + + if (iRows == 1) + { + __m128i org0 = _mm_loadu_si128((__m128i*)&piOrg[0]); + __m128i cur0 = _mm_loadu_si128((__m128i*)&piCur[0]); + diff[0] = _mm_sub_epi16(org0, cur0); + diff[1] = _mm_cvtepi16_epi32( _mm_srli_si128( diff[0], 8 ) ); + diff[0] = _mm_cvtepi16_epi32( diff[0] ); + } + else if (iCols == 1) + { + Pel diffI[8]; + for (int i = 0; i < 8; i++) + { + diffI[i] = piOrg[0] - piCur[0]; + piOrg += iStrideOrg; + piCur += iStrideCur; + } + diff[0] = _mm_loadu_si128((__m128i*)&diffI[0]); + diff[1] = _mm_cvtepi16_epi32( _mm_srli_si128( diff[0], 8 ) ); + diff[0] = _mm_cvtepi16_epi32( diff[0] ); + } + else + { + std::cerr << "shall not be here" << std::endl; + return -1; + } + + m2[0] = _mm_add_epi32(diff[0], diff[1]); + m2[1] = _mm_sub_epi32(diff[0], diff[1]); + + m1[0] = _mm_unpacklo_epi32(m2[0], m2[1]); + m1[1] = _mm_unpackhi_epi32(m2[0], m2[1]); + m2[0] = _mm_add_epi32(m1[0], m1[1]); + m2[1] = _mm_sub_epi32(m1[0], m1[1]); + + m1[0] = _mm_unpacklo_epi32(m2[0], m2[1]); + m1[1] = _mm_unpackhi_epi32(m2[0], m2[1]); + m2[0] = _mm_abs_epi32(_mm_add_epi32(m1[0], m1[1])); +#if JVET_R0164_MEAN_SCALED_SATD + uint32_t absDc = _mm_cvtsi128_si32(m2[0]); +#endif + m2[1] = _mm_abs_epi32(_mm_sub_epi32(m1[0], m1[1])); + + __m128i iSum = _mm_add_epi32(m2[0], m2[1]); + iSum = _mm_add_epi32(iSum, _mm_shuffle_epi32(iSum, 0x4e)); // 01001110 + iSum = _mm_add_epi32(iSum, _mm_shuffle_epi32(iSum, 0xb1)); // 10110001 + uint32_t sad = _mm_cvtsi128_si32( iSum ); + +#if JVET_R0164_MEAN_SCALED_SATD + sad -= absDc; + sad += absDc >> 2; +#endif + sad = ( int ) ( sad / sqrt( 8.0 * 1 ) * 2 ); + + return sad; +} +#endif + static uint32_t xCalcHAD8x16_AVX2( const Pel* piOrg, const Pel* piCur, const int iStrideOrg, const int iStrideCur, const int iBitDepth ) { @@ -2570,6 +2710,40 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam ) piCur += iOffsetCur; } } +#if JVET_AJ0096_SATD_REORDER_INTRA || JVET_AJ0096_SATD_REORDER_INTER + else if (iRows == 1 && iCols % 16 == 0) + { + for( x = 0; x < iCols; x += 16 ) + { + uiSum += xCalcHADs1x16_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iRows, 16); + } + } + else if (iCols == 1 && iRows % 16 == 0) + { + for( y = 0; y < iRows; y += 16 ) + { + uiSum += xCalcHADs1x16_SSE( &piOrg[0], &piCur[0], iStrideOrg, iStrideCur, 16, iCols ); + piOrg += (iStrideOrg << 4); + piCur += (iStrideCur << 4); + } + } + else if (iRows == 1 && iCols % 8 == 0) + { + for( x = 0; x < iCols; x += 8 ) + { + uiSum += xCalcHADs1x8_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur, iRows, 8); + } + } + else if (iCols == 1 && iRows % 8 == 0) + { + for( y = 0; y < iRows; y += 8 ) + { + uiSum += xCalcHADs1x8_SSE(&piOrg[0], &piCur[0], iStrideOrg, iStrideCur, 8, iCols); + piOrg += (iStrideOrg << 3); + piCur += (iStrideCur << 3); + } + } +#endif #if JVET_AI0185_ADAPTIVE_COST_IN_MERGE_MODE else if (iRows == 1 || iCols == 1) {