...
 
Commits (170)
CacheEnable : 1
CacheLineSize : 256
NumCacheLine : 64
NumWay : 4
CacheAddrMode : 0
FrameReport :
CacheEnable : 1
CacheLineSize : 256
NumCacheLine : 64
NumWay : 4
CacheAddrMode : 1
BlkWidth : 16
BlkHeight : 16
FrameReport : 0
......@@ -9,11 +9,6 @@ Profile : next
MaxCUWidth : 64 # Maximum coding unit width in pixel
MaxCUHeight : 64 # Maximum coding unit height in pixel
MaxPartitionDepth : 4 # Maximum coding unit depth
# obsoleted by TULog2MaxSize
#QuadtreeTULog2MinSize : 2 # Log2 of minimum transform size for
# # quadtree-based TU coding (2...6)
QuadtreeTUMaxDepthInter : 3
QuadtreeTUMaxDepthIntra : 3
#======== Coding Structure =============
IntraPeriod : 1 # Period of I-Frame ( -1 = only first)
......@@ -31,7 +26,7 @@ FDM : 1 # Fast Decision for Merge RD cost
#======== Quantization =============
QP : 32 # Quantization parameter(0-51)
MaxDeltaQP : 0 # CU-based multi-QP optimization
MaxCuDQPDepth : 0 # Max depth of a minimum CuDQP for sub-LCU-level delta QP
MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment
DeltaQpRD : 0 # Slice-based multi-QP optimization
RDOQ : 1 # RDOQ
RDOQTS : 1 # RDOQ for transform skip
......@@ -89,7 +84,6 @@ TemporalSubsampleRatio : 8
# General
CTUSize : 128
LCTUFast : 1
QuadtreeTULog2MaxSize : 6
DualITree : 1 # separate partitioning of luma and chroma channels for I-slices
MinQTLumaISlice : 8
......@@ -117,6 +111,7 @@ LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable
# Fast tools
PBIntraFast : 1
ISPFast : 1
FastMrg : 1
AMaxBT : 1
......
......@@ -9,11 +9,6 @@ Profile : next
MaxCUWidth : 64 # Maximum coding unit width in pixel
MaxCUHeight : 64 # Maximum coding unit height in pixel
MaxPartitionDepth : 4 # Maximum coding unit depth
# obsoleted by TULog2MaxSize
#QuadtreeTULog2MinSize : 2 # Log2 of minimum transform size for
# # quadtree-based TU coding (2...6)
QuadtreeTUMaxDepthInter : 3
QuadtreeTUMaxDepthIntra : 3
#======== Coding Structure =============
IntraPeriod : -1 # Period of I-Frame ( -1 = only first)
......@@ -39,7 +34,7 @@ FDM : 1 # Fast Decision for Merge RD cost
#======== Quantization =============
QP : 32 # Quantization parameter(0-51)
MaxDeltaQP : 0 # CU-based multi-QP optimization
MaxCuDQPDepth : 0 # Max depth of a minimum CuDQP for sub-LCU-level delta QP
MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment
DeltaQpRD : 0 # Slice-based multi-QP optimization
RDOQ : 1 # RDOQ
RDOQTS : 1 # RDOQ for transform skip
......@@ -105,7 +100,6 @@ CrQpOffset : 1
# General
CTUSize : 128
LCTUFast : 1
QuadtreeTULog2MaxSize : 6
DualITree : 1 # separate partitioning of luma and chroma channels for I-slices
MinQTLumaISlice : 8
......@@ -134,6 +128,7 @@ LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable
# Fast tools
PBIntraFast : 1
ISPFast : 1
FastMrg : 1
AMaxBT : 1
......
......@@ -9,11 +9,6 @@ Profile : next
MaxCUWidth : 64 # Maximum coding unit width in pixel
MaxCUHeight : 64 # Maximum coding unit height in pixel
MaxPartitionDepth : 4 # Maximum coding unit depth
# obsoleted by TULog2MaxSize
#QuadtreeTULog2MinSize : 2 # Log2 of minimum transform size for
# # quadtree-based TU coding (2...6)
QuadtreeTUMaxDepthInter : 3
QuadtreeTUMaxDepthIntra : 3
#======== Coding Structure =============
IntraPeriod : -1 # Period of I-Frame ( -1 = only first)
......@@ -39,7 +34,7 @@ FDM : 1 # Fast Decision for Merge RD cost
#======== Quantization =============
QP : 32 # Quantization parameter(0-51)
MaxDeltaQP : 0 # CU-based multi-QP optimization
MaxCuDQPDepth : 0 # Max depth of a minimum CuDQP for sub-LCU-level delta QP
MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment
DeltaQpRD : 0 # Slice-based multi-QP optimization
RDOQ : 1 # RDOQ
RDOQTS : 1 # RDOQ for transform skip
......@@ -105,7 +100,6 @@ CrQpOffset : 1
# General
CTUSize : 128
LCTUFast : 1
QuadtreeTULog2MaxSize : 6
DualITree : 1 # separate partitioning of luma and chroma channels for I-slices
MinQTLumaISlice : 8
......@@ -137,6 +131,7 @@ LumaReshapeEnable : 1 # luma reshaping. 0: disable 1:enable
# Fast tools
PBIntraFast : 1
ISPFast : 1
FastMrg : 1
AMaxBT : 1
......
......@@ -9,11 +9,6 @@ Profile : next
MaxCUWidth : 64 # Maximum coding unit width in pixel
MaxCUHeight : 64 # Maximum coding unit height in pixel
MaxPartitionDepth : 4 # Maximum coding unit depth
# obsoleted by TULog2MaxSize
#QuadtreeTULog2MinSize : 2 # Log2 of minimum transform size for
# # quadtree-based TU coding (2...6)
QuadtreeTUMaxDepthInter : 3
QuadtreeTUMaxDepthIntra : 3
#======== Coding Structure =============
IntraPeriod : 32 # Period of I-Frame ( -1 = only first)
......@@ -53,7 +48,7 @@ FDM : 1 # Fast Decision for Merge RD cost
#======== Quantization =============
QP : 32 # Quantization parameter(0-51)
MaxDeltaQP : 0 # CU-based multi-QP optimization
MaxCuDQPDepth : 0 # Max depth of a minimum CuDQP for sub-LCU-level delta QP
MaxCuDQPSubdiv : 0 # Maximum subdiv for CU luma Qp adjustment
DeltaQpRD : 0 # Slice-based multi-QP optimization
RDOQ : 1 # RDOQ
RDOQTS : 1 # RDOQ for transform skip
......@@ -119,7 +114,6 @@ CrQpOffset : 1
# General
CTUSize : 128
LCTUFast : 1
QuadtreeTULog2MaxSize : 6
DualITree : 1 # separate partitioning of luma and chroma channels for I-slices
MinQTLumaISlice : 8
......@@ -153,6 +147,7 @@ DMVR : 1
# Fast tools
PBIntraFast : 1
ISPFast : 1
FastMrg : 1
AMaxBT : 1
......
......@@ -111,6 +111,9 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
"\t1: enable bit statistic\n"
"\t2: enable tool statistic\n"
"\t3: enable bit and tool statistic\n")
#endif
#if JVET_M0445_MCTS_DEC_CHECK
("MCTSCheck", m_mctsCheck, false, "If enabled, the decoder checks for violations of mc_exact_sample_value_match_flag in Temporal MCTS ")
#endif
;
......@@ -148,6 +151,9 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
}
#endif
#if JVET_M0445_MCTS_DEC_CHECK
g_mctsDecCheckEnabled = m_mctsCheck;
#endif
// Chroma output bit-depth
if( m_outputBitDepth[CHANNEL_TYPE_LUMA] != 0 && m_outputBitDepth[CHANNEL_TYPE_CHROMA] == 0 )
{
......@@ -230,6 +236,9 @@ DecAppCfg::DecAppCfg()
, m_bClipOutputVideoToRec709Range(false)
, m_packedYUVMode(false)
, m_statMode(0)
#if JVET_M0445_MCTS_DEC_CHECK
, m_mctsCheck(false)
#endif
{
for (uint32_t channelTypeIndex = 0; channelTypeIndex < MAX_NUM_CHANNEL_TYPE; channelTypeIndex++)
{
......
......@@ -73,6 +73,9 @@ protected:
bool m_packedYUVMode; ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data
std::string m_cacheCfgFile; ///< Config file of cache model
int m_statMode; ///< Config statistic mode (0 - bit stat, 1 - tool stat, 3 - both)
#if JVET_M0445_MCTS_DEC_CHECK
bool m_mctsCheck;
#endif
public:
DecAppCfg();
......
......@@ -112,6 +112,52 @@ void EncApp::xInitLibCfg()
m_cEncLib.setConformanceWindow ( m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom );
m_cEncLib.setFramesToBeEncoded ( m_framesToBeEncoded );
//====== SPS constraint flags =======
m_cEncLib.setIntraOnlyConstraintFlag ( m_intraConstraintFlag );
m_cEncLib.setMaxBitDepthConstraintIdc ( m_bitDepthConstraint - 8 );
m_cEncLib.setMaxChromaFormatConstraintIdc ( m_chromaFormatConstraint );
m_cEncLib.setFrameConstraintFlag ( m_bFrameConstraintFlag );
m_cEncLib.setNoQtbttDualTreeIntraConstraintFlag ( !m_dualTree );
m_cEncLib.setNoSaoConstraintFlag ( !m_bUseSAO );
m_cEncLib.setNoAlfConstraintFlag ( !m_alf );
m_cEncLib.setNoPcmConstraintFlag ( !m_usePCM );
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
m_cEncLib.setNoRefWraparoundConstraintFlag ( m_bNoRefWraparoundConstraintFlag );
#endif
m_cEncLib.setNoTemporalMvpConstraintFlag ( m_TMVPModeId ? false : true );
m_cEncLib.setNoSbtmvpConstraintFlag ( m_SubPuMvpMode ? false : true );
m_cEncLib.setNoAmvrConstraintFlag ( m_bNoAmvrConstraintFlag );
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
m_cEncLib.setNoBdofConstraintFlag ( !m_BIO );
#endif
m_cEncLib.setNoCclmConstraintFlag ( m_LMChroma ? false : true );
#if JVET_M0464_UNI_MTS
#if JVET_M0303_IMPLICIT_MTS
m_cEncLib.setNoMtsConstraintFlag ( (m_MTS || m_MTSImplicit) ? false : true );
#else
m_cEncLib.setNoMtsConstraintFlag ( m_MTS ? false : true );
#endif
#else
#if JVET_M0303_IMPLICIT_MTS
m_cEncLib.setNoMtsConstraintFlag ( (m_EMT || m_MTSImplicit) ? false : true );
#else
m_cEncLib.setNoMtsConstraintFlag ( m_EMT ? false : true );
#endif
#endif
m_cEncLib.setNoAffineMotionConstraintFlag ( !m_Affine );
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
m_cEncLib.setNoGbiConstraintFlag ( !m_GBi );
m_cEncLib.setNoMhIntraConstraintFlag ( !m_MHIntra );
m_cEncLib.setNoTriangleConstraintFlag ( !m_Triangle );
#endif
m_cEncLib.setNoLadfConstraintFlag ( !m_LadfEnabed );
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
m_cEncLib.setNoCurrPicRefConstraintFlag ( !m_IBCMode );
m_cEncLib.setNoQpDeltaConstraintFlag ( m_bNoQpDeltaConstraintFlag );
#endif
m_cEncLib.setNoDepQuantConstraintFlag ( !m_depQuantEnabledFlag);
m_cEncLib.setNoSignDataHidingConstraintFlag ( !m_signDataHidingEnabledFlag );
//====== Coding Structure ========
m_cEncLib.setIntraPeriod ( m_iIntraPeriod );
m_cEncLib.setDecodingRefreshType ( m_iDecodingRefreshType );
......@@ -170,8 +216,13 @@ void EncApp::xInitLibCfg()
//====== Quality control ========
m_cEncLib.setMaxDeltaQP ( m_iMaxDeltaQP );
#if JVET_M0113_M0188_QG_SIZE
m_cEncLib.setCuQpDeltaSubdiv ( m_cuQpDeltaSubdiv );
m_cEncLib.setCuChromaQpOffsetSubdiv ( m_cuChromaQpOffsetSubdiv );
#else
m_cEncLib.setMaxCuDQPDepth ( m_iMaxCuDQPDepth );
m_cEncLib.setDiffCuChromaQpOffsetDepth ( m_diffCuChromaQpOffsetDepth );
#endif
m_cEncLib.setChromaCbQpOffset ( m_cbQpOffset );
m_cEncLib.setChromaCrQpOffset ( m_crQpOffset );
m_cEncLib.setChromaCbQpOffsetDualTree ( m_cbQpOffsetDualTree );
......@@ -293,10 +344,12 @@ void EncApp::xInitLibCfg()
m_cEncLib.setMaxCUHeight ( m_uiCTUSize );
m_cEncLib.setMaxCodingDepth ( m_uiMaxCodingDepth );
m_cEncLib.setLog2DiffMaxMinCodingBlockSize ( m_uiLog2DiffMaxMinCodingBlockSize );
m_cEncLib.setQuadtreeTULog2MaxSize ( m_quadtreeTULog2MaxSize );
m_cEncLib.setQuadtreeTULog2MinSize ( m_quadtreeTULog2MinSize );
m_cEncLib.setQuadtreeTUMaxDepthInter ( m_uiQuadtreeTUMaxDepthInter );
m_cEncLib.setQuadtreeTUMaxDepthIntra ( m_uiQuadtreeTUMaxDepthIntra );
#if MAX_TB_SIZE_SIGNALLING
m_cEncLib.setLog2MaxTbSize ( m_log2MaxTbSize );
#endif
#if JVET_M0428_ENC_DB_OPT
m_cEncLib.setUseEncDbOpt(m_encDbOpt);
#endif
m_cEncLib.setUseFastLCTU ( m_useFastLCTU );
m_cEncLib.setFastInterSearchMode ( m_fastInterSearchMode );
m_cEncLib.setUseEarlyCU ( m_bUseEarlyCU );
......@@ -329,6 +382,9 @@ void EncApp::xInitLibCfg()
m_cEncLib.setUseBLambdaForNonKeyLowDelayPictures ( m_bUseBLambdaForNonKeyLowDelayPictures );
m_cEncLib.setPCMLog2MinSize ( m_uiPCMLog2MinSize);
m_cEncLib.setUsePCM ( m_usePCM );
#if JVET_M0102_INTRA_SUBPARTITIONS
m_cEncLib.setUseFastISP ( m_useFastISP );
#endif
// set internal bit-depth and constants
for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++)
......@@ -429,6 +485,9 @@ void EncApp::xInitLibCfg()
m_cEncLib.setSOPDescriptionSEIEnabled ( m_SOPDescriptionSEIEnabled );
m_cEncLib.setScalableNestingSEIEnabled ( m_scalableNestingSEIEnabled );
m_cEncLib.setTMCTSSEIEnabled ( m_tmctsSEIEnabled );
#if JVET_M0445_MCTS
m_cEncLib.setMCTSEncConstraint ( m_MCTSEncConstraint);
#endif
m_cEncLib.setTimeCodeSEIEnabled ( m_timeCodeSEIEnabled );
m_cEncLib.setNumberOfTimeSets ( m_timeCodeSEINumTs );
for(int i = 0; i < m_timeCodeSEINumTs; i++)
......@@ -850,6 +909,9 @@ void EncApp::rateStatsAccum(const AccessUnit& au, const std::vector<uint32_t>& a
#endif
case NAL_UNIT_SPS:
case NAL_UNIT_PPS:
#if JVET_M0132_APS
case NAL_UNIT_APS:
#endif
m_essentialBytes += *it_stats;
break;
default:
......@@ -866,7 +928,11 @@ void EncApp::printRateSummary()
msg( DETAILS,"Bytes written to file: %u (%.3f kbps)\n", m_totalBytes, 0.008 * m_totalBytes / time );
if (m_summaryVerboseness > 0)
{
#if JVET_M0132_APS
msg(DETAILS, "Bytes for SPS/PPS/APS/Slice (Incl. Annex B): %u (%.3f kbps)\n", m_essentialBytes, 0.008 * m_essentialBytes / time);
#else
msg( DETAILS,"Bytes for SPS/PPS/Slice (Incl. Annex B): %u (%.3f kbps)\n", m_essentialBytes, 0.008 * m_essentialBytes / time );
#endif
}
}
......
This diff is collapsed.
......@@ -117,6 +117,39 @@ protected:
bool m_bClipOutputVideoToRec709Range;
bool m_packedYUVMode; ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data
bool m_bIntraOnlyConstraintFlag;
uint32_t m_maxBitDepthConstraintIdc;
uint32_t m_maxChromaFormatConstraintIdc;
bool m_bFrameConstraintFlag;
bool m_bNoQtbttDualTreeIntraConstraintFlag;
bool m_bNoSaoConstraintFlag;
bool m_bNoAlfConstraintFlag;
bool m_bNoPcmConstraintFlag;
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
bool m_bNoRefWraparoundConstraintFlag;
#endif
bool m_bNoTemporalMvpConstraintFlag;
bool m_bNoSbtmvpConstraintFlag;
bool m_bNoAmvrConstraintFlag;
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
bool m_bNoBdofConstraintFlag;
#endif
bool m_bNoCclmConstraintFlag;
bool m_bNoMtsConstraintFlag;
bool m_bNoAffineMotionConstraintFlag;
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
bool m_bNoGbiConstraintFlag;
bool m_bNoMhIntraConstraintFlag;
bool m_bNoTriangleConstraintFlag;
#endif
bool m_bNoLadfConstraintFlag;
#if JVET_M0451_INTEROPERABILITY_POINT_SYNTAX
bool m_bNoCurrPicRefConstraintFlag;
bool m_bNoQpDeltaConstraintFlag;
#endif
bool m_bNoDepQuantConstraintFlag;
bool m_bNoSignDataHidingConstraintFlag;
// profile/level
Profile::Name m_profile;
Level::Tier m_levelTier;
......@@ -153,6 +186,9 @@ protected:
bool m_rdpcmEnabledFlag[NUMBER_OF_RDPCM_SIGNALLING_MODES];///< control flags for residual DPCM
bool m_persistentRiceAdaptationEnabledFlag; ///< control flag for Golomb-Rice parameter adaptation over each slice
bool m_cabacBypassAlignmentEnabledFlag;
#if JVET_M0102_INTRA_SUBPARTITIONS
bool m_useFastISP; ///< flag for enabling fast methods for ISP
#endif
// coding quality
#if QP_SWITCHING_FOR_PARALLEL
......@@ -169,8 +205,13 @@ protected:
int* m_aidQP; ///< array of slice QP values
int m_iMaxDeltaQP; ///< max. |delta QP|
uint32_t m_uiDeltaQpRD; ///< dQP range for multi-pass slice QP optimization
#if JVET_M0113_M0188_QG_SIZE
int m_cuQpDeltaSubdiv; ///< Maximum subdiv for CU luma Qp adjustment (0:default)
int m_cuChromaQpOffsetSubdiv; ///< If negative, then do not apply chroma qp offsets.
#else
int m_iMaxCuDQPDepth; ///< Max. depth for a minimum CuDQPSize (0:default)
int m_diffCuChromaQpOffsetDepth; ///< If negative, then do not apply chroma qp offsets.
#endif
bool m_bFastDeltaQP; ///< Fast Delta QP (false:default)
int m_cbQpOffset; ///< Chroma Cb QP Offset (0:default)
......@@ -273,6 +314,9 @@ protected:
uint32_t m_reshapeSignalType;
uint32_t m_intraCMD;
ReshapeCW m_reshapeCW;
#endif
#if JVET_M0428_ENC_DB_OPT
bool m_encDbOpt;
#endif
unsigned m_uiMaxCUWidth; ///< max. CU width in pixel
unsigned m_uiMaxCUHeight; ///< max. CU height in pixel
......@@ -294,14 +338,9 @@ protected:
int m_numWppExtraLines;
bool m_ensureWppBitEqual;
// transfom unit (TU) definition
int m_quadtreeTULog2MaxSize;
int m_quadtreeTULog2MinSize;
int m_tuLog2MaxSize;
uint32_t m_uiQuadtreeTUMaxDepthInter;
uint32_t m_uiQuadtreeTUMaxDepthIntra;
#if MAX_TB_SIZE_SIGNALLING
int m_log2MaxTbSize;
#endif
// coding tools (bit-depth)
int m_inputBitDepth [MAX_NUM_CHANNEL_TYPE]; ///< bit-depth of input file
int m_outputBitDepth [MAX_NUM_CHANNEL_TYPE]; ///< bit-depth of output file
......@@ -459,6 +498,10 @@ protected:
uint32_t m_greenMetadataType;
uint32_t m_xsdMetricType;
#if JVET_M0445_MCTS
bool m_MCTSEncConstraint;
#endif
// weighted prediction
bool m_useWeightedPred; ///< Use of weighted prediction in P slices
bool m_useWeightedBiPred; ///< Use of bi-directional weighted prediction in B slices
......@@ -548,7 +591,7 @@ protected:
std::string m_decodeBitstreams[2]; ///< filename for decode bitstreams.
#if JVET_M0055_DEBUG_CTU
int m_debugCTU;
#endif
#endif
int m_switchPOC; ///< dbg poc.
int m_switchDQP; ///< switch DQP.
int m_fastForwardToPOC; ///< get to encoding the specified POC as soon as possible by skipping temporal layers irrelevant for the specified POC
......
......@@ -221,6 +221,9 @@ const char * NALU_TYPE[] =
#endif
"SPS_NUT",
"PPS_NUT",
#if JVET_M0132
"APS_NUT",
#endif
"AUD_NUT",
"EOS_NUT",
"EOB_NUT",
......
......@@ -104,7 +104,13 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
{
Area blk( xPos, yPos, width, height );
deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk );
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y]);
#if JVET_M0277_FIX_PCM_DISABLEFILTER
Area blkPCM(xPos, yPos, width, height);
resetPCMBlkClassInfo(cs, m_classifier, tmpYuv.get(COMPONENT_Y), blkPCM);
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs );
#else
m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y] );
#endif
}
for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
......@@ -117,7 +123,11 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
{
Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY );
#if JVET_M0277_FIX_PCM_DISABLEFILTER
m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx], cs );
#else
m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx] );
#endif
}
}
ctuIdx++;
......@@ -164,7 +174,7 @@ void AdaptiveLoopFilter::reconstructCoeff( AlfSliceParam& alfSliceParam, Channel
for( int classIdx = 0; classIdx < numClasses; classIdx++ )
{
int filterIdx = alfSliceParam.filterCoeffDeltaIdx[classIdx];
memcpy( m_coeffFinal + classIdx * MAX_NUM_ALF_LUMA_COEFF, coeff + filterIdx * MAX_NUM_ALF_LUMA_COEFF, sizeof( int16_t ) * numCoeff );
memcpy( m_coeffFinal + classIdx * MAX_NUM_ALF_LUMA_COEFF, coeff + filterIdx * MAX_NUM_ALF_LUMA_COEFF, sizeof( short ) * numCoeff );
}
if( bRedo && alfSliceParam.alfLumaCoeffDeltaPredictionFlag )
......@@ -272,6 +282,57 @@ void AdaptiveLoopFilter::deriveClassification( AlfClassifier** classifier, const
}
}
}
#if JVET_M0277_FIX_PCM_DISABLEFILTER
void AdaptiveLoopFilter::resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk)
{
if ( !cs.sps->getPCMFilterDisableFlag() )
{
return;
}
int height = blk.pos().y + blk.height;
int width = blk.pos().x + blk.width;
const int clsSizeY = 4;
const int clsSizeX = 4;
int classIdx = m_ALF_UNUSED_CLASSIDX;
int transposeIdx = m_ALF_UNUSED_TRANSPOSIDX;
for( int i = blk.pos().y; i < height; i += m_CLASSIFICATION_BLK_SIZE )
{
int nHeight = std::min(i + m_CLASSIFICATION_BLK_SIZE, height) - i;
for( int j = blk.pos().x; j < width; j += m_CLASSIFICATION_BLK_SIZE )
{
int nWidth = std::min(j + m_CLASSIFICATION_BLK_SIZE, width) - j;
int posX = j;
int posY = i;
for( int subi = 0; subi < nHeight; subi += clsSizeY )
{
for( int subj = 0; subj < nWidth; subj += clsSizeX )
{
int yOffset = subi + posY;
int xOffset = subj + posX;
Position pos(xOffset, yOffset);
const CodingUnit* cu = cs.getCU(pos, CH_L);
if ( cu->ipcm )
{
AlfClassifier *cl0 = classifier[yOffset] + xOffset;
AlfClassifier *cl1 = classifier[yOffset + 1] + xOffset;
AlfClassifier *cl2 = classifier[yOffset + 2] + xOffset;
AlfClassifier *cl3 = classifier[yOffset + 3] + xOffset;
cl0[0] = cl0[1] = cl0[2] = cl0[3] =
cl1[0] = cl1[1] = cl1[2] = cl1[3] =
cl2[0] = cl2[1] = cl2[2] = cl2[3] =
cl3[0] = cl3[1] = cl3[2] = cl3[3] = AlfClassifier(classIdx, transposeIdx);
}
}
}
}
}
}
#endif
void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift )
{
......@@ -445,13 +506,23 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
}
template<AlfFilterType filtType>
#if JVET_M0277_FIX_PCM_DISABLEFILTER
void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
#else
void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng )
#endif
{
const bool bChroma = isChroma( compId );
if( bChroma )
{
CHECK( filtType != 0, "Chroma needs to have filtType == 0" );
}
#if JVET_M0277_FIX_PCM_DISABLEFILTER
const SPS* sps = cs.slice->getSPS();
bool isDualTree =CS::isDualITree(cs);
bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag();
ChromaFormat nChromaFormat = sps->getChromaFormatIdc();
#endif
const CPelBuf srcLuma = recSrc.get( compId );
PelBuf dstLuma = recDst.get( compId );
......@@ -479,6 +550,8 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
int transposeIdx = 0;
const int clsSizeY = 4;
const int clsSizeX = 4;
bool pcmFlags2x2[4] = {0,0,0,0};
CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" );
CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" );
......@@ -516,8 +589,40 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
{
AlfClassifier& cl = pClass[j];
transposeIdx = cl.transposeIdx;
#if JVET_M0277_FIX_PCM_DISABLEFILTER
if( isPCMFilterDisabled && cl.classIdx== m_ALF_UNUSED_CLASSIDX && transposeIdx== m_ALF_UNUSED_TRANSPOSIDX )
{
continue;
}
#endif
coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
}
#if JVET_M0277_FIX_PCM_DISABLEFILTER
else if( isPCMFilterDisabled )
{
int blkX, blkY;
bool *flags = pcmFlags2x2;
// check which chroma 2x2 blocks use PCM
// chroma PCM may not be aligned with 4x4 ALF processing grid
for( blkY=0; blkY<4; blkY+=2 )
{
for( blkX=0; blkX<4; blkX+=2 )
{
Position pos(j+startWidth+blkX, i+startHeight+blkY);
CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L);
*flags++ = cu->ipcm ? 1 : 0;
}
}
// skip entire 4x4 if all chroma 2x2 blocks use PCM
if( pcmFlags2x2[0] && pcmFlags2x2[1] && pcmFlags2x2[2] && pcmFlags2x2[3] )
{
continue;
}
}
#endif
if( filtType == ALF_FILTER_7 )
{
......@@ -572,6 +677,23 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
for( int jj = 0; jj < clsSizeX; jj++ )
{
// skip 2x2 PCM chroma blocks
if( bChroma && isPCMFilterDisabled )
{
if( pcmFlags2x2[2*(ii>>1) + (jj>>1)] )
{
pImg0++;
pImg1++;
pImg2++;
pImg3++;
pImg4++;
pImg5++;
pImg6++;
continue;
}
}
int sum = 0;
if( filtType == ALF_FILTER_7 )
{
......
......@@ -41,7 +41,9 @@
#include "CommonDef.h"
#include "Unit.h"
#if JVET_M0277_FIX_PCM_DISABLEFILTER
#include "UnitTools.h"
#endif
struct AlfClassifier
{
AlfClassifier() {}
......@@ -68,6 +70,10 @@ class AdaptiveLoopFilter
public:
static constexpr int m_NUM_BITS = 8;
static constexpr int m_CLASSIFICATION_BLK_SIZE = 32; //non-normative, local buffer size
#if JVET_M0277_FIX_PCM_DISABLEFILTER
static constexpr int m_ALF_UNUSED_CLASSIDX = 255;
static constexpr int m_ALF_UNUSED_TRANSPOSIDX = 255;
#endif
AdaptiveLoopFilter();
virtual ~AdaptiveLoopFilter() {}
......@@ -78,17 +84,28 @@ public:
void destroy();
static void deriveClassificationBlk( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk );
#if JVET_M0277_FIX_PCM_DISABLEFILTER
void resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk);
#endif
template<AlfFilterType filtType>
#if JVET_M0277_FIX_PCM_DISABLEFILTER
static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
#else
static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng );
#endif
inline static int getMaxGolombIdx( AlfFilterType filterType )
{
return filterType == ALF_FILTER_5 ? 2 : 3;
}
void( *m_deriveClassificationBlk )( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
#if JVET_M0277_FIX_PCM_DISABLEFILTER
void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
#else
void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng );
void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng );
#endif
#ifdef TARGET_SIMD_X86
void initAdaptiveLoopFilterX86();
......
......@@ -460,32 +460,6 @@ void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& cl
}
}
}
template<>
Pel AreaBuf <Pel> ::computeAvg() const
{
const Pel* src = buf;
#if !JVET_M0102_INTRA_SUBPARTITIONS
if (width == 1)
{
THROW("Blocks of width = 1 not supported");
}
else
{
#endif
int32_t acc = 0;
#define AVG_INC \
src += stride;
#define AVG_OP(ADDR) acc += src[ADDR]
SIZE_AWARE_PER_EL_OP(AVG_OP, AVG_INC);
#undef AVG_INC
#undef AVG_OP
return Pel((acc + (area() >> 1)) / area());
#if !JVET_M0102_INTRA_SUBPARTITIONS
}
#endif
}
#endif
template<>
......
......@@ -145,8 +145,9 @@ struct AreaBuf : public Size
#if JVET_M0427_INLOOP_RESHAPER
void rspSignal ( std::vector<Pel>& pLUT );
void scaleSignal ( const int scale, const bool dir , const ClpRng& clpRng);
T computeAvg ( ) const;
#endif
T computeAvg ( ) const;
T& at( const int &x, const int &y ) { return buf[y * stride + x]; }
const T& at( const int &x, const int &y ) const { return buf[y * stride + x]; }
......@@ -566,6 +567,7 @@ void AreaBuf<T>::extendBorderPel( unsigned margin )
::memcpy( p - ( y + 1 ) * s, p, sizeof( T ) * ( w + ( margin << 1 ) ) );
}
}
template<typename T>
T AreaBuf<T>::meanDiff( const AreaBuf<const T> &other ) const
{
......@@ -629,6 +631,33 @@ void AreaBuf<T>::transposedFrom( const AreaBuf<const T> &other )
}
}
template<typename T>
T AreaBuf <T> ::computeAvg() const
{
#if !JVET_M0102_INTRA_SUBPARTITIONS
if (width == 1)
{
THROW("Blocks of width = 1 not supported");
}
else
{
#endif
const T* src = buf;
#if ENABLE_QPA
int64_t acc = 0; // for picture-wise use in getGlaringColorQPOffset() and applyQPAdaptationChroma()
#else
int32_t acc = 0;
#endif
#define AVG_INC src += stride
#define AVG_OP(ADDR) acc += src[ADDR]
SIZE_AWARE_PER_EL_OP(AVG_OP, AVG_INC);
#undef AVG_INC
#undef AVG_OP
return T ((acc + (area() >> 1)) / area());
#if !JVET_M0102_INTRA_SUBPARTITIONS
}
#endif
}
#ifndef DONT_UNDEF_SIZE_AWARE_PER_EL_OP
#undef SIZE_AWARE_PER_EL_OP
......
......@@ -3,7 +3,7 @@
* and contributor rights, including patent rights, and no such rights are
* granted under this license.
*
* Copyright (c) 2010 - 2018, ITU/ISO/IEC
* Copyright (c) 2010 - 2019, ITU/ISO/IEC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
......@@ -55,6 +55,13 @@
#define JVET_J0090_MEMORY_BANDWITH_MEASURE_PRINT_FRAME -1
#endif
enum CacheAddressMap
{
CACHE_MODE_1D = 0,
CACHE_MODE_2D,
MAX_NUM_CACHE_MODE
};
namespace po = df::program_options_lite;
void* cache_mem_align_malloc(int size, int alignSize)
......@@ -73,7 +80,10 @@ void* cache_mem_align_malloc(int size, int alignSize)
void cache_mem_align_free(void *ptr)
{
free(*(((void **)ptr) - 1));
if ( ptr )
{
free(*(((void **)ptr) - 1));
}
}
CacheModel::CacheModel()
......@@ -133,6 +143,9 @@ void CacheModel::xConfigure(const std::string& filename )
("CacheLineSize", m_cacheLineSize, 128, "Cache line size")
("NumCacheLine", m_numCacheLine, 32, "Number of cache line")
("NumWay", m_numWay, 4, "Number of way")
("CacheAddrMode", m_cacheAddrMode, 0, "Address mapping mode 0 : linear address 1 : 2D address")
("BlkWidth", m_cacheBlkWidth, 32, "Block width in 2D address mode")
("BlkHeight", m_cacheBlkHeight, 16, "Block height in 2D address mode")
("FrameReport", m_frameReport, false, "Report in each frame" )
;
......@@ -144,6 +157,13 @@ void CacheModel::xConfigure(const std::string& filename )
fprintf( stderr, "cache line size is bigger that memory alignment\n" );
fprintf( stderr, "This may lead mismatch among enviroments\n" );
}
if ( m_cacheAddrMode == CACHE_MODE_2D )
{
int blkSize = m_cacheBlkWidth * m_cacheBlkHeight;
if ( m_cacheLineSize % blkSize != 0 && blkSize % m_cacheLineSize ) {
THROW("CacheLineSize shall be multiple of BlkWidth x BlkHeight or BlkWidth x BlkHeight shall be multiple of CacheLineSize in 2D mode");
}
}
}
// initilize cache information such as size
......@@ -248,18 +268,22 @@ void CacheModel::accumulateFrame( )
// report bandwidth, hit ratio and so on in a Frame
void CacheModel::reportFrame( )
{
if ( m_cacheEnable && m_frameReport )
if ( m_cacheEnable )
{
int hitCount = 0;
for ( int i = 0 ; i < m_cacheSize ; i++ )
if ( m_frameReport )
{
hitCount += m_hitCount[ i ];
}
int hitCount = 0;
for ( int i = 0 ; i < m_cacheSize ; i++ )
{
hitCount += m_hitCount[ i ];
}
fprintf( stdout, "Cache Statics in frame %d\n", m_frameCount++ );
fprintf( stdout, "Hit ratio %5.2f [%%]\n", (100 * (double)(hitCount)) / m_totalAccess );
fprintf( stdout, "Required bandwidth %.1f [MB]\n", ((double)(m_missHitCount) * m_cacheLineSize) / (1024 * 1024) );
fprintf( stdout, "Cache Statics in frame %d\n", m_frameCount );
fprintf( stdout, "Hit ratio %5.2f [%%]\n", (100 * (double)(hitCount)) / m_totalAccess );
fprintf( stdout, "Required bandwidth %.1f [MB]\n", ((double)(m_missHitCount) * m_cacheLineSize) / (1024 * 1024) );
}
m_frameCount++;
}
}
......@@ -288,6 +312,7 @@ void CacheModel::setRefPicture( const Picture *refPic, const ComponentID CompID
m_refPoc = refPic->getPOC();
m_base = refPic->getOrigin( PIC_RECONSTRUCTION, CompID );
m_compID = CompID;
m_picWidth = refPic->getRecoBuf( CompID ).stride;
}
bool CacheModel::xIsCacheHit( int pos, size_t addr )
......@@ -382,6 +407,31 @@ void CacheModel::xUpdateCacheStatus( int entry, int way )
xUpdatePLRUStatus( entry, way );
}
size_t CacheModel::xMapAddress( size_t offset ) {
size_t ret;
size_t xInPic, yInPic, blkPosX, blkPosY, xInBlk, yInBlk;
switch ( m_cacheAddrMode ) {
case CACHE_MODE_1D : // diret mapping
return offset;
case CACHE_MODE_2D : // 2D address mapping
xInPic = offset % m_picWidth;
yInPic = offset / m_picWidth;
blkPosX = xInPic / m_cacheBlkWidth;
blkPosY = yInPic / m_cacheBlkHeight;
xInBlk = xInPic % m_cacheBlkWidth;
yInBlk = yInPic % m_cacheBlkHeight;
ret = m_picWidth * blkPosY * m_cacheBlkHeight;
ret += blkPosX * m_cacheBlkWidth * m_cacheBlkHeight;
ret += yInBlk * m_cacheBlkWidth;
ret += xInBlk;
return ret;
default :
THROW( "Unknown address mode " << m_cacheAddrMode );
return 0;
}
}
// check cache hit/miss
void CacheModel::cacheAccess( const Pel *addr, const std::string& fileName, const int lineNum )
......@@ -391,7 +441,7 @@ void CacheModel::cacheAccess( const Pel *addr, const std::string& fileName, cons
return;
}
bool hit = false;
size_t cacheAddr = ((size_t) (addr - m_base)) >> m_shift;
size_t cacheAddr = xMapAddress( (size_t) (addr - m_base) ) >> m_shift;
int entry = (int) (cacheAddr % m_numCacheLine);
int pos = entry * m_numWay;
int way;
......
......@@ -65,6 +65,9 @@ private:
int m_numCacheLine; // # of cache line
int m_numWay; // # of way
int m_cacheSize; // total entry numer (line number * way)
int m_cacheAddrMode; // cache address mode
int m_cacheBlkWidth; // block width in 2D access
int m_cacheBlkHeight; // block height in 2D access
// cache parameters for address calc
int m_shift;
// cache entry
......@@ -76,6 +79,7 @@ private:
int m_refPoc;
Pel* m_base;
ComponentID m_compID;
int m_picWidth;
// PLRU parameters
int m_treeDepth;
int* m_treeStatus;
......@@ -109,6 +113,7 @@ protected:
int xCalcTreeSize( int way );
int xCalcPower( int num );
int xGetWay( int entry );
size_t xMapAddress( size_t offset );
void xConfigure(const std::string& filename);
void xUpdateCache( int entry, size_t addr );
void xUpdateCacheStatus( int entry, int way );
......
......@@ -708,6 +708,31 @@ void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLa
initStructData();
}
void CodingStructure::addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> &lut, const MotionInfo &mi)
{
size_t currCnt = lut.size();
bool pruned = false;
int sameCandIdx = 0;
for (int idx = 0; idx < currCnt; idx++)
{
if (lut[idx] == mi)
{
sameCandIdx = idx;
pruned = true;
break;
}
}
if (pruned || currCnt == lut.capacity())
{
lut.erase(lut.begin() + sameCandIdx);
}
lut.push_back(mi);
}
void CodingStructure::rebindPicBufs()
{
CHECK( parent, "rebindPicBufs can only be used for the top level CodingStructure" );
......@@ -751,6 +776,11 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe
{
CHECK( this == &subStruct, "Trying to init self as sub-structure" );
#if JVET_M0428_ENC_DB_OPT
subStruct.useDbCost = false;
subStruct.costDbOffset = 0;
#endif
for( uint32_t i = 0; i < subStruct.area.blocks.size(); i++ )
{
CHECKD( subStruct.area.blocks[i].size() != subArea.blocks[i].size(), "Trying to init sub-structure of incompatible size" );
......@@ -772,6 +802,9 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe
subStruct.vps = vps;
#endif
subStruct.pps = pps;
#if JVET_M0132_APS
subStruct.aps = aps;
#endif
subStruct.slice = slice;
subStruct.baseQP = baseQP;
subStruct.prevQP[_chType]
......@@ -780,6 +813,8 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe
subStruct.m_isTuEnc = isTuEnc;
subStruct.motionLut = motionLut;
subStruct.initStructData( currQP[_chType], isLossless );
if( isTuEnc )
......@@ -842,6 +877,8 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
CMotionBuf subMB = subStruct.getMotionBuf( clippedArea );
ownMB.copyFrom( subMB );
motionLut = subStruct.motionLut;
}
#if ENABLE_WPP_PARALLELISM
......@@ -852,7 +889,9 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
fracBits += subStruct.fracBits;
dist += subStruct.dist;
cost += subStruct.cost;
#if JVET_M0428_ENC_DB_OPT
costDbOffset += subStruct.costDbOffset;
#endif
if( parent )
{
// allow this to be false at the top level
......@@ -916,7 +955,9 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
fracBits += subStruct.fracBits;
dist += subStruct.dist;
cost += subStruct.cost;
#if JVET_M0428_ENC_DB_OPT
costDbOffset += subStruct.costDbOffset;
#endif
if( parent )
{
// allow this to be false at the top level
......@@ -978,7 +1019,9 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel
fracBits = other.fracBits;
dist = other.dist;
cost = other.cost;
#if JVET_M0428_ENC_DB_OPT
costDbOffset = other.costDbOffset;
#endif
CHECKD( area != other.area, "Incompatible sizes" );
const UnitArea dualITreeArea = CS::getArea( *this, this->area, chType );
......@@ -1026,6 +1069,8 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel
CMotionBuf subMB = other.getMotionBuf();
ownMB.copyFrom( subMB );
motionLut = other.motionLut;
}
if( copyTUs )
......@@ -1110,6 +1155,10 @@ void CodingStructure::initStructData( const int &QP, const bool &_isLosses, cons
cost = MAX_DOUBLE;
#if JVET_M0102_INTRA_SUBPARTITIONS
lumaCost = MAX_DOUBLE;
#endif
#if JVET_M0428_ENC_DB_OPT
costDbOffset = 0;
useDbCost = false;
#endif
interHad = std::numeric_limits<Distortion>::max();
}
......
......@@ -101,6 +101,9 @@ public:
bool isLossless;
const SPS *sps;
const PPS *pps;
#if JVET_M0132_APS
APS * aps;
#endif
#if HEVC_VPS
const VPS *vps;
#endif
......@@ -179,6 +182,10 @@ public:
static_vector<double, NUM_ENC_FEATURES> features;
double cost;
#if JVET_M0428_ENC_DB_OPT
bool useDbCost;
double costDbOffset;
#endif
#if JVET_M0102_INTRA_SUBPARTITIONS
double lumaCost;
#endif
......@@ -207,6 +214,10 @@ public:
std::vector<PredictionUnit*> pus;
std::vector< TransformUnit*> tus;
LutMotionCand motionLut;
void addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS>& lut, const MotionInfo &mi);
private:
// needed for TU encoding
......
......@@ -146,13 +146,19 @@ static const int NOT_VALID = -1;
static const int AMVP_MAX_NUM_CANDS = 2; ///< AMVP: advanced motion vector prediction - max number of final candidates
static const int AMVP_MAX_NUM_CANDS_MEM = 3; ///< AMVP: advanced motion vector prediction - max number of candidates
static const int AMVP_DECIMATION_FACTOR = 2;
static const int MRG_MAX_NUM_CANDS = 7; ///< MERGE
static const int MRG_MAX_NUM_CANDS = 6; ///< MERGE
static const int AFFINE_MRG_MAX_NUM_CANDS = 5; ///< AFFINE MERGE
static const int MAX_TLAYER = 7; ///< Explicit temporal layer QP offset - max number of temporal layer
static const int ADAPT_SR_SCALE = 1; ///< division factor for adaptive search range
static const int MIN_TB_LOG2_SIZEY = 2;
static const int MAX_TB_LOG2_SIZEY = 6;
static const int MIN_TB_SIZEY = 1 << MIN_TB_LOG2_SIZEY;
static const int MAX_TB_SIZEY = 1 << MAX_TB_LOG2_SIZEY;
static const int MAX_NUM_PICS_IN_SOP = 1024;
static const int MAX_NESTING_NUM_OPS = 1024;
......@@ -186,6 +192,9 @@ static const int C2FLAG_NUMBER = 1; ///< maxi
static const int MAX_NUM_VPS = 16;
static const int MAX_NUM_SPS = 16;
static const int MAX_NUM_PPS = 64;
#if JVET_M0132_APS
static const int MAX_NUM_APS = 32; //Currently APS ID has 5 bits
#endif
static const int MLS_GRP_NUM = 1024; ///< Max number of coefficient groups, max(16, 256)
......@@ -274,12 +283,7 @@ static const int MAX_CU_DEPTH = 7; ///< log2
static const int MAX_CU_SIZE = 1<<MAX_CU_DEPTH;
static const int MIN_CU_LOG2 = 2;
static const int MIN_PU_SIZE = 4;
static const int MIN_TU_SIZE = 4;
static const int MAX_TU_SIZE = 128;
static const int MAX_LOG2_TU_SIZE_PLUS_ONE = 8; ///< log2(MAX_TU_SIZE) + 1
static const int MAX_NUM_PARTS_IN_CTU = ( ( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 ) );
static const int MAX_TR_SIZE = MAX_CU_SIZE;
static const int MAX_TU_SIZE_FOR_PROFILE = 64;
static const int MAX_LOG2_DIFF_CU_TR_SIZE = 2;
static const int MAX_CU_TILING_PARTITIONS = 1 << ( MAX_LOG2_DIFF_CU_TR_SIZE << 1 );
......@@ -326,7 +330,7 @@ static const int BIO_TEMP_BUFFER_SIZE = (MAX_CU_SIZE
static const int GBI_NUM = 5; ///< the number of weight options
static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5
static const int GBI_SIZE_CONSTRAINT = 256; ///< disabling GBi if cu size is smaller than 256
static const int MAX_NUM_HMVP_CANDS = 5; ///< maximum number of HMVP candidates to be stored and used in merge list
static const int MAX_NUM_HMVP_CANDS = (MRG_MAX_NUM_CANDS-1); ///< maximum number of HMVP candidates to be stored and used in merge list
static const int MAX_NUM_HMVP_AVMPCANDS = 4; ///< maximum number of HMVP candidates to be used in AMVP list
#if W0038_DB_OPT
......
......@@ -67,7 +67,6 @@ CoeffCodingContext::CoeffCodingContext(const TransformUnit& tu, ComponentID comp
#endif
, m_log2BlockWidth (g_aucLog2[m_width])
, m_log2BlockHeight (g_aucLog2[m_height])
, m_log2BlockSize ((m_log2BlockWidth + m_log2BlockHeight)>>1)
, m_maxNumCoeff (m_width * m_height)
#if HEVC_USE_SIGN_HIDING
, m_signHiding (signHide)
......
......@@ -75,9 +75,6 @@ public:
unsigned log2CGHeight () const { return m_log2CGHeight; }
#endif
unsigned log2CGSize () const { return m_log2CGSize; }
unsigned log2BlockWidth () const { return m_log2BlockWidth; }
unsigned log2BlockHeight () const { return m_log2BlockHeight; }
unsigned log2BlockSize () const { return m_log2BlockSize; }
bool extPrec () const { return m_extendedPrecision; }
int maxLog2TrDRange () const { return m_maxLog2TrDynamicRange; }
unsigned maxNumCoeff () const { return m_maxNumCoeff; }
......@@ -209,7 +206,6 @@ private:
const unsigned m_heightInGroups;
const unsigned m_log2BlockWidth;
const unsigned m_log2BlockHeight;
const unsigned m_log2BlockSize;
const unsigned m_maxNumCoeff;
#if HEVC_USE_SIGN_HIDING
const bool m_signHiding;
......@@ -253,13 +249,22 @@ class CUCtx
{
public:
CUCtx() : isDQPCoded(false), isChromaQpAdjCoded(false),
#if JVET_M0113_M0188_QG_SIZE
qgStart(false),
#endif
numNonZeroCoeffNonTs(0) {}
CUCtx(int _qp) : isDQPCoded(false), isChromaQpAdjCoded(false),
#if JVET_M0113_M0188_QG_SIZE
qgStart(false),
#endif
numNonZeroCoeffNonTs(0), qp(_qp) {}
~CUCtx() {}
public:
bool isDQPCoded;
bool isChromaQpAdjCoded;
#if JVET_M0113_M0188_QG_SIZE
bool qgStart;
#endif
uint32_t numNonZeroCoeffNonTs;
int8_t qp; // used as a previous(last) QP and for QP prediction
};
......
......@@ -520,106 +520,4 @@ private:
class CtxStateBuf
{
public:
CtxStateBuf () : m_valid(false) {}
~CtxStateBuf() {}
inline void reset() { m_valid = false; }
inline bool getIfValid(Ctx &ctx) const
{
if (m_valid)
{
ctx.loadPStates(m_states);
return true;
}
return false;
}
inline void store(const Ctx &ctx)
{
ctx.savePStates(m_states);
m_valid = true;
}
private:
std::vector<uint16_t> m_states;
bool m_valid;
};
class CtxStateArray
{
public:
CtxStateArray () {}
~CtxStateArray() {}
inline void resetAll()
{
for (std::size_t k = 0; k < m_data.size(); k++)
{
m_data[k].reset();
}
}
inline void resize(std::size_t reqSize)
{
if (m_data.size() < reqSize)
{
m_data.resize(reqSize);
}
}
inline bool getIfValid(Ctx &ctx, unsigned id) const
{
if (id < m_data.size())
{
return m_data[id].getIfValid(ctx);
}
return false;
}
inline void store(const Ctx &ctx, unsigned id)
{
if (id >= m_data.size())
{
resize(id + 1);
}
m_data[id].store(ctx);
}
private:
std::vector<CtxStateBuf> m_data;
};
class CtxWSizeSet
{
public:
CtxWSizeSet() : m_valid(false), m_changes(false), m_coded(false), m_log2WinSizes() {}
bool isValid() const { return m_valid; }
const std::vector<uint8_t>& getWinSizeBuffer() const { return m_log2WinSizes; }
std::vector<uint8_t>& getWinSizeBuffer() { return m_log2WinSizes; }
int getMode() const { return ( !m_valid || !m_changes ? 0 : ( m_coded ? 2 : 1 ) ); }
void setInvalid() { m_valid = m_changes = m_coded = false; }
void setCoded() { m_coded = true; }
void setValidOnly() { m_valid = true; }
void setValid( uint8_t defSize )
{
m_valid = true;
m_changes = false;
for( std::size_t n = 0; n < m_log2WinSizes.size(); n++ )
{
if( m_log2WinSizes[n] && m_log2WinSizes[n] != defSize )
{
m_changes = true;
return;
}
}
}
private:
bool m_valid;
bool m_changes;
bool m_coded;
std::vector<uint8_t> m_log2WinSizes;
};
#endif
......@@ -548,8 +548,8 @@ namespace DQIntern
private:
const ScanElement * m_scanId2Pos;
int32_t m_lastBitsX [ MAX_TU_SIZE ];
int32_t m_lastBitsY [ MAX_TU_SIZE ];
int32_t m_lastBitsX [ MAX_TB_SIZEY ];
int32_t m_lastBitsY [ MAX_TB_SIZEY ];
BinFracBits m_sigSbbFracBits [ sm_maxNumSigSbbCtx ];
BinFracBits m_sigFracBits [ sm_numCtxSetsSig ][ sm_maxNumSigCtx ];
CoeffFracBits m_gtxFracBits [ sm_maxNumGtxCtx ];
......@@ -1001,7 +1001,7 @@ namespace DQIntern
SbbCtx m_allSbbCtx [8];
SbbCtx* m_currSbbCtx;
SbbCtx* m_prevSbbCtx;
uint8_t m_memory[ 8 * ( MAX_TU_SIZE * MAX_TU_SIZE + MLS_GRP_NUM ) ];
uint8_t m_memory[ 8 * ( MAX_TB_SIZEY * MAX_TB_SIZEY + MLS_GRP_NUM ) ];
};
#define RICEMAX 32
......@@ -1339,7 +1339,7 @@ namespace DQIntern
UPDATE(4);
}
#undef UPDATE
sumAbs = std::min(31, sumAbs);
sumAbs = std::min<TCoeff>(31, sumAbs);
m_goRicePar = g_auiGoRiceParsCoeff[sumAbs];
m_goRiceZero = m_goRiceZeroArray[sumAbs];
}
......@@ -1499,7 +1499,7 @@ namespace DQIntern
State* m_skipStates;
State m_startState;
Quantizer m_quant;
Decision m_trellis[ MAX_TU_SIZE * MAX_TU_SIZE ][ 8 ];
Decision m_trellis[ MAX_TB_SIZEY * MAX_TB_SIZEY ][ 8 ];
};
......
......@@ -39,6 +39,9 @@
#include "Buffer.h"
#include "UnitTools.h"
#if JVET_M0445_MCTS
#include "MCTS.h"
#endif
#include <memory.h>
#include <algorithm>
......@@ -201,12 +204,15 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
}
#if JVET_M0147_DMVR
m_cYuvPredTempDMVRL0 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)));
m_cYuvPredTempDMVRL1 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)));
for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
if (m_cYuvPredTempDMVRL0 == nullptr && m_cYuvPredTempDMVRL1 == nullptr)
{
m_cRefSamplesDMVRL0[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA));
m_cRefSamplesDMVRL1[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA));
m_cYuvPredTempDMVRL0 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)));
m_cYuvPredTempDMVRL1 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION)));
for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
{
m_cRefSamplesDMVRL0[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA) * (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + NTAPS_LUMA));
m_cRefSamplesDMVRL1[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (2