Commit 00818548 authored by Yu-Chi Su's avatar Yu-Chi Su

JVET_L0646 GBi: remove type prefixes/unused code

parent d020de2a
......@@ -221,77 +221,40 @@ void removeWeightHighFreq_SSE(int16_t* src0, int src0Stride, const int16_t* src1
int offset = 1 << (shift - 1);
if (W == 8)
{
#if 0//USE_AVX2
if (vext >= AVX2)
{
__m256i vzero = _mm256_setzero_si256();
__m256i voffset = _mm256_set1_epi32(offset);
__m256i vw0 = _mm256_set1_epi32(weight0);
__m256i vw1 = _mm256_set1_epi32(weight1);
for (int row = 0; row < height; row++)
{
for (int col = 0; col < width; col += 8)
{
__m256i vsrc0, vsrc1;
__m128i a = _mm_load_si128((const __m128i *)&src0[col]);
__m128i b = _mm_load_si128((const __m128i *)&src1[col]);
vsrc0 = _mm256_cvtepi16_epi32(a);
vsrc1 = _mm256_cvtepi16_epi32(b);
vsrc0 = _mm256_mullo_epi32(vsrc0, vw0);
vsrc1 = _mm256_mullo_epi32(vsrc1, vw1);
vsrc0 = _mm256_add_epi32(_mm256_sub_epi32(vsrc0, vsrc1), voffset);
vsrc0 = _mm256_srai_epi32(vsrc0, shift);
vsrc0 = _mm256_packs_epi32(vsrc0, vzero);
_mm_store_si128((__m128i *)&src0[col], _mm256_castsi256_si128(vsrc0));
}
__m128i vzero = _mm_setzero_si128();
__m128i voffset = _mm_set1_epi32(offset);
__m128i vw0 = _mm_set1_epi32(weight0);
__m128i vw1 = _mm_set1_epi32(weight1);
src0 += src0Stride;
src1 += src1Stride;
}
}
else
#endif
for (int row = 0; row < height; row++)
{
__m128i vzero = _mm_setzero_si128();
__m128i voffset = _mm_set1_epi32(offset);
__m128i vw0 = _mm_set1_epi32(weight0);
__m128i vw1 = _mm_set1_epi32(weight1);
for (int row = 0; row < height; row++)
for (int col = 0; col < width; col += 8)
{
for (int col = 0; col < width; col += 8)
{
__m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]);
__m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]);
__m128i vtmp, vdst, vsrc;
vdst = _mm_cvtepi16_epi32(vsrc0);
vsrc = _mm_cvtepi16_epi32(vsrc1);
vdst = _mm_mullo_epi32(vdst, vw0);
vsrc = _mm_mullo_epi32(vsrc, vw1);
vtmp = _mm_add_epi32(_mm_sub_epi32(vdst, vsrc), voffset);
vtmp = _mm_srai_epi32(vtmp, shift);
vsrc0 = _mm_unpackhi_epi64(vsrc0, vzero);
vsrc1 = _mm_unpackhi_epi64(vsrc1, vzero);
vdst = _mm_cvtepi16_epi32(vsrc0);
vsrc = _mm_cvtepi16_epi32(vsrc1);
vdst = _mm_mullo_epi32(vdst, vw0);
vsrc = _mm_mullo_epi32(vsrc, vw1);
vdst = _mm_add_epi32(_mm_sub_epi32(vdst, vsrc), voffset);
vdst = _mm_srai_epi32(vdst, shift);
vdst = _mm_packs_epi32(vtmp, vdst);
_mm_store_si128((__m128i *)&src0[col], vdst);
}
src0 += src0Stride;
src1 += src1Stride;
__m128i vsrc0 = _mm_load_si128((const __m128i *)&src0[col]);
__m128i vsrc1 = _mm_load_si128((const __m128i *)&src1[col]);
__m128i vtmp, vdst, vsrc;
vdst = _mm_cvtepi16_epi32(vsrc0);
vsrc = _mm_cvtepi16_epi32(vsrc1);
vdst = _mm_mullo_epi32(vdst, vw0);
vsrc = _mm_mullo_epi32(vsrc, vw1);
vtmp = _mm_add_epi32(_mm_sub_epi32(vdst, vsrc), voffset);
vtmp = _mm_srai_epi32(vtmp, shift);
vsrc0 = _mm_unpackhi_epi64(vsrc0, vzero);
vsrc1 = _mm_unpackhi_epi64(vsrc1, vzero);
vdst = _mm_cvtepi16_epi32(vsrc0);
vsrc = _mm_cvtepi16_epi32(vsrc1);
vdst = _mm_mullo_epi32(vdst, vw0);
vsrc = _mm_mullo_epi32(vsrc, vw1);
vdst = _mm_add_epi32(_mm_sub_epi32(vdst, vsrc), voffset);
vdst = _mm_srai_epi32(vdst, shift);
vdst = _mm_packs_epi32(vtmp, vdst);
_mm_store_si128((__m128i *)&src0[col], vdst);
}
src0 += src0Stride;
src1 += src1Stride;
}
}
else if (W == 4)
......
......@@ -888,8 +888,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
if( cu.cs->sps->getSpsNext().getUseGBi() && cu.GBiIdx == GBI_DEFAULT && cu.cs->slice->isInterB() )
{
const bool checkIdentical = true;
m_cUniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
m_cUniMotions.copyFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
m_uniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
m_uniMotions.copyFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
}
#endif
xCopyAMVPInfo( &amvp[eRefPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
......@@ -2778,8 +2778,8 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu,
#if JVET_L0646_GBI
if(pu.cu->cs->sps->getSpsNext().getUseGBi() && pu.cu->GBiIdx == GBI_DEFAULT && pu.cu->slice->isInterB())
{
m_cUniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType);
m_cUniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType
m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType);
m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType
);
}
#endif
......@@ -4727,9 +4727,9 @@ double InterSearch::xGetMEDistortionWeight(uint8_t gbiIdx, RefPicList eRefPicLis
}
bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost)
{
if (m_cUniMotions.isReadMode((uint32_t)eRefPicList, (uint32_t)iRefIdx))
if (m_uniMotions.isReadMode((uint32_t)eRefPicList, (uint32_t)iRefIdx))
{
m_cUniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
m_pcRdCost->setPredictor(pcMvPred);
m_pcRdCost->setCostScale(0);
......@@ -4745,9 +4745,9 @@ bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList,
}
bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost)
{
if (m_cUniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType))
if (m_uniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType))
{
m_cUniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType);
m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType);
m_pcRdCost->setCostScale(0);
uint32_t uiMvBits = 0;
......@@ -4774,7 +4774,7 @@ void InterSearch::initWeightIdxBits()
{
for (int n = 0; n < GBI_NUM; ++n)
{
m_auiEstWeightIdxBits[n] = deriveWeightIdxBits(n);
m_estWeightIdxBits[n] = deriveWeightIdxBits(n);
}
}
#endif
......
......@@ -82,8 +82,8 @@ private:
ClpRng m_lumaClpRng;
#if JVET_L0646_GBI
uint32_t m_auiEstWeightIdxBits[GBI_NUM];
GBiMotionParam m_cUniMotions;
uint32_t m_estWeightIdxBits[GBI_NUM];
GBiMotionParam m_uniMotions;
bool m_affineModeSelected;
#endif
......@@ -360,8 +360,8 @@ protected:
double xGetMEDistortionWeight ( uint8_t gbiIdx, RefPicList eRefPicList);
bool xReadBufferedUniMv ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost);
public:
void resetBufferedUniMotions () { m_cUniMotions.reset(); }
uint32_t getWeightIdxBits ( uint8_t gbiIdx ) { return m_auiEstWeightIdxBits[gbiIdx]; }
void resetBufferedUniMotions () { m_uniMotions.reset(); }
uint32_t getWeightIdxBits ( uint8_t gbiIdx ) { return m_estWeightIdxBits[gbiIdx]; }
void initWeightIdxBits ();
protected:
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment