diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 13dfb3c8dfc67debb172adbb2a5d20d70906425e..eb10c3621c63b3228147774a6510bba4cc6d766b 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -3407,56 +3407,56 @@ void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8 int tpmMask = 0; int lookUpY = 0, motionIdx = 0; bool isFlip = angle >= 13 && angle <= 27; - int distanceIdx = g_GeoParams[splitDir][1]; - int distanceX = angle; - int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES; - int offsetX = ( - (int)pu.lwidth()) >> 1; - int offsetY = ( - (int)pu.lheight()) >> 1; - if (distanceIdx > 0) - { - if (angle % 16 == 8 || (angle % 16 != 0 && pu.lheight() >= pu.lwidth())) + int distanceIdx = g_GeoParams[splitDir][1]; + int distanceX = angle; + int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES; + int offsetX = (-(int)pu.lwidth()) >> 1; + int offsetY = (-(int)pu.lheight()) >> 1; + if (distanceIdx > 0) + { + if (angle % 16 == 8 || (angle % 16 != 0 && pu.lheight() >= pu.lwidth())) offsetY += angle < 16 ? ((distanceIdx * pu.lheight()) >> 3) : -(int)((distanceIdx * pu.lheight()) >> 3); - else + else offsetX += angle < 16 ? ((distanceIdx * pu.lwidth()) >> 3) : -(int)((distanceIdx * pu.lwidth()) >> 3); - } + } for (int y = 0; y < mb.height; y++) { - lookUpY = (((4*y + offsetY) << 1) + 5) * g_Dis[distanceY]; - for (int x = 0; x < mb.width; x++) - { - motionIdx = (((4*x + offsetX) << 1) + 5) * g_Dis[distanceX] + lookUpY; - tpmMask = abs(motionIdx) < 32 ? 2 : ( motionIdx<=0 ? (1 - isFlip):isFlip); - if (tpmMask == 2) - { - mb.at(x, y).isInter = true; - mb.at(x, y).interDir = biMv.interDir; - mb.at(x, y).refIdx[0] = biMv.refIdx[0]; - mb.at(x, y).refIdx[1] = biMv.refIdx[1]; - mb.at(x, y).mv[0] = biMv.mv[0]; - mb.at(x, y).mv[1] = biMv.mv[1]; - mb.at(x, y).sliceIdx = biMv.sliceIdx; - } - else if (tpmMask == 0) - { - mb.at(x, y).isInter = true; - mb.at(x, y).interDir = geoMrgCtx.interDirNeighbours[candIdx0]; - mb.at(x, y).refIdx[0] = geoMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx; - mb.at(x, y).refIdx[1] = geoMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx; - mb.at(x, y).mv[0] = geoMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv; - mb.at(x, y).mv[1] = geoMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv; - mb.at(x, y).sliceIdx = biMv.sliceIdx; - } - else - { - mb.at(x, y).isInter = true; - mb.at(x, y).interDir = geoMrgCtx.interDirNeighbours[candIdx1]; - mb.at(x, y).refIdx[0] = geoMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx; - mb.at(x, y).refIdx[1] = geoMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx; - mb.at(x, y).mv[0] = geoMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv; - mb.at(x, y).mv[1] = geoMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv; - mb.at(x, y).sliceIdx = biMv.sliceIdx; - } - } + lookUpY = (((4 * y + offsetY) << 1) + 5) * g_Dis[distanceY]; + for (int x = 0; x < mb.width; x++) + { + motionIdx = (((4 * x + offsetX) << 1) + 5) * g_Dis[distanceX] + lookUpY; + tpmMask = abs(motionIdx) < 32 ? 2 : (motionIdx <= 0 ? (1 - isFlip) : isFlip); + if (tpmMask == 2) + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = biMv.interDir; + mb.at(x, y).refIdx[0] = biMv.refIdx[0]; + mb.at(x, y).refIdx[1] = biMv.refIdx[1]; + mb.at(x, y).mv[0] = biMv.mv[0]; + mb.at(x, y).mv[1] = biMv.mv[1]; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + else if (tpmMask == 0) + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = geoMrgCtx.interDirNeighbours[candIdx0]; + mb.at(x, y).refIdx[0] = geoMrgCtx.mvFieldNeighbours[candIdx0 << 1].refIdx; + mb.at(x, y).refIdx[1] = geoMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].refIdx; + mb.at(x, y).mv[0] = geoMrgCtx.mvFieldNeighbours[candIdx0 << 1].mv; + mb.at(x, y).mv[1] = geoMrgCtx.mvFieldNeighbours[(candIdx0 << 1) + 1].mv; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + else + { + mb.at(x, y).isInter = true; + mb.at(x, y).interDir = geoMrgCtx.interDirNeighbours[candIdx1]; + mb.at(x, y).refIdx[0] = geoMrgCtx.mvFieldNeighbours[candIdx1 << 1].refIdx; + mb.at(x, y).refIdx[1] = geoMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].refIdx; + mb.at(x, y).mv[0] = geoMrgCtx.mvFieldNeighbours[candIdx1 << 1].mv; + mb.at(x, y).mv[1] = geoMrgCtx.mvFieldNeighbours[(candIdx1 << 1) + 1].mv; + mb.at(x, y).sliceIdx = biMv.sliceIdx; + } + } } } #endif diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h index 3bdfec3f7484c548092b04601df1a593cff97089..b11df9504448ceb4db9fd248851b1c38ff52736d 100644 --- a/source/Lib/CommonLib/x86/RdCostX86.h +++ b/source/Lib/CommonLib/x86/RdCostX86.h @@ -2004,18 +2004,18 @@ Distortion RdCost::xGetSADwMask_SIMD( const DistParam &rcDtParam ) { __m256i vsrc1 = _mm256_lddqu_si256( ( __m256i* )( &src1[x] ) ); __m256i vsrc2 = _mm256_lddqu_si256( ( __m256i* )( &src2[x] ) ); - __m256i vmask; - if ( rcDtParam.stepX == -1 ) - { - vmask = _mm256_lddqu_si256((__m256i*)((&weightMask[x]) - (x << 1) - (16 - 1))); - const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - vmask = _mm256_shuffle_epi8(vmask, shuffle_mask); - vmask = _mm256_permute4x64_epi64 (vmask, _MM_SHUFFLE(1, 0, 3, 2)); - } - else - { - vmask = _mm256_lddqu_si256((__m256i*)(&weightMask[x])); - } + __m256i vmask; + if (rcDtParam.stepX == -1) + { + vmask = _mm256_lddqu_si256((__m256i*)((&weightMask[x]) - (x << 1) - (16 - 1))); + const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + vmask = _mm256_shuffle_epi8(vmask, shuffle_mask); + vmask = _mm256_permute4x64_epi64(vmask, _MM_SHUFFLE(1, 0, 3, 2)); + } + else + { + vmask = _mm256_lddqu_si256((__m256i*)(&weightMask[x])); + } vsum32 = _mm256_add_epi32( vsum32, _mm256_madd_epi16( vmask, _mm256_abs_epi16( _mm256_sub_epi16( vsrc1, vsrc2 ) ) ) ); } src1 += strideSrc1; @@ -2038,17 +2038,17 @@ Distortion RdCost::xGetSADwMask_SIMD( const DistParam &rcDtParam ) { __m128i vsrc1 = _mm_loadu_si128( ( const __m128i* )( &src1[x] ) ); __m128i vsrc2 = _mm_lddqu_si128( ( const __m128i* )( &src2[x] ) ); - __m128i vmask; - if (rcDtParam.stepX == -1) - { - vmask = _mm_lddqu_si128((__m128i*)((&weightMask[x]) - (x << 1) - (8 - 1))); - const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - vmask = _mm_shuffle_epi8(vmask, shuffle_mask); - } - else - { - vmask = _mm_lddqu_si128((const __m128i*)(&weightMask[x])); - } + __m128i vmask; + if (rcDtParam.stepX == -1) + { + vmask = _mm_lddqu_si128((__m128i*)((&weightMask[x]) - (x << 1) - (8 - 1))); + const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + vmask = _mm_shuffle_epi8(vmask, shuffle_mask); + } + else + { + vmask = _mm_lddqu_si128((const __m128i*)(&weightMask[x])); + } vsum32 = _mm_add_epi32( vsum32, _mm_madd_epi16( vmask, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ) ); } src1 += strideSrc1; diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 67392013355bf03d44b1e943372274dbe2241f22..a76d7bde584189801147ae611dc38374e832a159 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -188,7 +188,7 @@ protected: #if !JVET_Q0806 bool m_bNoTriangleConstraintFlag; #else - bool m_bNoGeoConstraintFlag; + bool m_noGeoConstraintFlag; #endif bool m_bNoLadfConstraintFlag; bool m_noTransformSkipConstraintFlag; diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index b1bb7a77edf28890da79d5183d20934e02c07c39..91e22c548f69522b0dbbf969aa59974df7ff86d8 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -89,13 +89,13 @@ struct GeoMergeCombo }; struct GeoMotionInfo { - uint8_t m_candIdx0; - uint8_t m_candIdx1; + uint8_t m_candIdx0; + uint8_t m_candIdx1; - GeoMotionInfo(uint8_t candIdx0, uint8_t candIdx1) : m_candIdx0(candIdx0), m_candIdx1(candIdx1) { } - GeoMotionInfo() { m_candIdx0 = m_candIdx1 = 0; } + GeoMotionInfo(uint8_t candIdx0, uint8_t candIdx1) : m_candIdx0(candIdx0), m_candIdx1(candIdx1) { } + GeoMotionInfo() { m_candIdx0 = m_candIdx1 = 0; } }; -struct smaller_than_combo_cost +struct SmallerThanComboCost { inline bool operator() (const GeoMergeCombo& first, const GeoMergeCombo& second) { @@ -108,7 +108,7 @@ public: GeoComboCostList() {}; ~GeoComboCostList() {}; std::vector<GeoMergeCombo> list; - void sortByCost() { std::sort(list.begin(), list.end(), smaller_than_combo_cost()); }; + void sortByCost() { std::sort(list.begin(), list.end(), SmallerThanComboCost()); }; }; struct SingleGeoMergeEntry {