Newer
Older
continue;
}
Santiago de Luxán Hernández
committed
if( ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) > 1.25 && tuIsDividedInRows ) || ( m_intraModeHorVerRatio.at( bestNormalIntraModeIndex ) < 0.8 && !tuIsDividedInRows ) )
Santiago de Luxán Hernández
committed
continue;

Karsten Suehring
committed
// set context models
m_CABACEstimator->getCtx() = ctxStart;
// determine residual for partition
cs.initSubStructure( *csTemp, partitioner.chType, cs.area, true );
bool tmpValidReturn = false;
if( cu.ispMode )
{
tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, subTuPartitioner, bestCurrentCost, 0, intraSubPartitionsProcOrder, false,
mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst );
}
else
{
if( ! fastMip )
{
m_bestCostNonMip = MAX_DOUBLE;
}
tmpValidReturn = xRecurIntraCodingLumaQT( *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP, uiBestPUMode.ispMod,
mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst );
}
if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
{
if( !sps.getUseLFNST() )
Santiago de Luxán Hernández
committed
{
if ( cu.ispMode == HOR_INTRA_SUBPARTITIONS )
{
ispHorAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lheight() > 2 && csTemp->cost >= bestCurrentCost );
}
else
{
ispVerAllZeroCbfs |= ( m_pcEncCfg->getUseFastISP() && csTemp->tus[0]->lwidth() > 2 && csTemp->cost >= bestCurrentCost );
}
Santiago de Luxán Hernández
committed
}
csTemp->cost = MAX_DOUBLE;
tmpValidReturn = false;
validReturn |= tmpValidReturn;
if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 )
{
m_modeCostStore[ lfnstIdx ][ testMip ? rdModeIdxList[ mode ] : mode ] = tmpValidReturn ? csTemp->cost : ( MAX_DOUBLE / 2.0 ); //(MAX_DOUBLE / 2.0) ??

Karsten Suehring
committed
DTRACE( g_trace_ctx, D_INTRA_COST, "IntraCost T %f (%d) \n", csTemp->cost, uiOrgMode.modeId );

Karsten Suehring
committed
if( tmpValidReturn )

Karsten Suehring
committed
{
// check r-d cost
if( csTemp->cost < csBest->cost )
{
std::swap( csTemp, csBest );

Karsten Suehring
committed
uiBestPUMode = uiOrgMode;
bestBDPCMMode = cu.bdpcmMode;
if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
{
m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;
}
if( csBest->cost < bestCurrentCost )
{
bestCurrentCost = csBest->cost;
}
if( !cu.ispMode && !cu.mtsFlag )
{
m_modeCtrl->setMtsFirstPassNoIspCost( csBest->cost );
}
}
if( !cu.ispMode && !cu.bdpcmMode && csBest->cost < bestCostNonBDPCM )
bestCostNonBDPCM = csBest->cost;
bestNormalIntraModeIndex = mode;

Karsten Suehring
committed
csTemp->releaseIntermediateData();
} // Mode loop
cu.ispMode = uiBestPUMode.ispMod;

Karsten Suehring
committed
if( validReturn )
{
cs.useSubStructure( *csBest, partitioner.chType, pu.singleChan( CHANNEL_TYPE_LUMA ), true, true, keepResi, keepResi );
}

Karsten Suehring
committed
csBest->releaseIntermediateData();
if( validReturn )
{
//=== update PU data ====
cu.mipFlag = uiBestPUMode.mipFlg;
pu.multiRefIdx = uiBestPUMode.mRefId;
pu.intraDir[ CHANNEL_TYPE_LUMA ] = uiBestPUMode.modeId;
cu.bdpcmMode = bestBDPCMMode;
}

Karsten Suehring
committed
}
//===== reset context models =====
m_CABACEstimator->getCtx() = ctxStart;
return validReturn;

Karsten Suehring
committed
}
void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner, const double maxCostAllowed )

Karsten Suehring
committed
{
const ChromaFormat format = cu.chromaFormat;
const uint32_t numberValidComponents = getNumberValidComponents(format);
CodingStructure &cs = *cu.cs;
const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() );
cs.setDecomp( cs.area.Cb(), false );
double bestCostSoFar = maxCostAllowed;
bool lumaUsesISP = !CS::isDualITree( *cu.cs ) && cu.ispMode;
PartSplit ispType = lumaUsesISP ? CU::getISPType( cu, COMPONENT_Y ) : TU_NO_ISP;
CHECK( cu.ispMode && bestCostSoFar < 0, "bestCostSoFar must be positive!" );

Karsten Suehring
committed
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
auto &pu = *cu.firstPU;
{
uint32_t uiBestMode = 0;
Distortion uiBestDist = 0;
double dBestCost = MAX_DOUBLE;
//----- init mode list ----
{
uint32_t uiMinMode = 0;
uint32_t uiMaxMode = NUM_CHROMA_MODE;
//----- check chroma modes -----
uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
PU::getIntraChromaCandModes( pu, chromaCandModes );
// create a temporary CS
CodingStructure &saveCS = *m_pSaveCS[0];
saveCS.pcv = cs.pcv;
saveCS.picture = cs.picture;
saveCS.area.repositionTo( cs.area );
saveCS.clearTUs();
if( !CS::isDualITree( cs ) && cu.ispMode )
{
saveCS.clearCUs();
saveCS.clearPUs();
}

Karsten Suehring
committed
if( CS::isDualITree( cs ) )
{
if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
{
partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
do
{
cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType ).depth = partitioner.currTrDepth;
} while( partitioner.nextPart( cs ) );
partitioner.exitCurrSplit();
}
else
cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType ), partitioner.chType );
}
std::vector<TransformUnit*> orgTUs;
if( lumaUsesISP )
{
CodingUnit& auxCU = saveCS.addCU( cu, partitioner.chType );
auxCU.ispMode = cu.ispMode;
saveCS.sps = cu.cs->sps;
saveCS.addPU( *cu.firstPU, partitioner.chType );
}

Karsten Suehring
committed
// create a store for the TUs
for( const auto &ptu : cs.tus )
{
// for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
if( lumaUsesISP || pu.contains( *ptu, CHANNEL_TYPE_CHROMA ) )

Karsten Suehring
committed
{
saveCS.addTU( *ptu, partitioner.chType );
orgTUs.push_back( ptu );
}
}
if( lumaUsesISP )
{
saveCS.clearCUs();
}
// SATD pre-selecting.
int satdModeList[NUM_CHROMA_MODE];
int64_t satdSortedCost[NUM_CHROMA_MODE];
for (int i = 0; i < NUM_CHROMA_MODE; i++)
{
satdSortedCost[i] = 0; // for the mode not pre-select by SATD, do RDO by default, so set the initial value 0.
satdModeList[i] = 0;
}
bool modeIsEnable[NUM_INTRA_MODE + 1]; // use intra mode idx to check whether enable
for (int i = 0; i < NUM_INTRA_MODE + 1; i++)
{
modeIsEnable[i] = 1;
}
DistParam distParam;
const bool useHadamard = !cu.transQuantBypass;
pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
initIntraPatternChType(cu, pu.Cb());
initIntraPatternChType(cu, pu.Cr());
xGetLumaRecPixels(pu, pu.Cb());
for (int idx = uiMinMode; idx <= uiMaxMode - 1; idx++)
{
int mode = chromaCandModes[idx];
satdModeList[idx] = mode;
if (PU::isLMCMode(mode) && !PU::isLMCModeEnabled(pu, mode))
{
continue;
}
if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
{
continue;
}
pu.intraDir[1] = mode; // temporary assigned, for SATD checking.
int64_t sad = 0;
CodingStructure& cs = *(pu.cs);
CompArea areaCb = pu.Cb();
PelBuf orgCb = cs.getOrgBuf(areaCb);
PelBuf predCb = cs.getPredBuf(areaCb);
m_pcRdCost->setDistParam(distParam, orgCb, predCb, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cb, useHadamard);
distParam.applyWeight = false;
if (PU::isLMCMode(mode))
{
predIntraChromaLM(COMPONENT_Cb, predCb, pu, areaCb, mode);
}
else
{
Alexey Filippov
committed
initPredIntraParams(pu, pu.Cb(), *pu.cs->sps);
predIntraAng(COMPONENT_Cb, predCb, pu);
}
sad += distParam.distFunc(distParam);

Karsten Suehring
committed
CompArea areaCr = pu.Cr();
PelBuf orgCr = cs.getOrgBuf(areaCr);
PelBuf predCr = cs.getPredBuf(areaCr);
m_pcRdCost->setDistParam(distParam, orgCr, predCr, pu.cs->sps->getBitDepth(CHANNEL_TYPE_CHROMA), COMPONENT_Cr, useHadamard);
distParam.applyWeight = false;
if (PU::isLMCMode(mode))
{
predIntraChromaLM(COMPONENT_Cr, predCr, pu, areaCr, mode);
}
else
{
Alexey Filippov
committed
initPredIntraParams(pu, pu.Cr(), *pu.cs->sps);
predIntraAng(COMPONENT_Cr, predCr, pu);
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
}
sad += distParam.distFunc(distParam);
satdSortedCost[idx] = sad;
}
// sort the mode based on the cost from small to large.
int tempIdx = 0;
int64_t tempCost = 0;
for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
{
for (int j = i + 1; j <= uiMaxMode - 1; j++)
{
if (satdSortedCost[j] < satdSortedCost[i])
{
tempIdx = satdModeList[i];
satdModeList[i] = satdModeList[j];
satdModeList[j] = tempIdx;
tempCost = satdSortedCost[i];
satdSortedCost[i] = satdSortedCost[j];
satdSortedCost[j] = tempCost;
}
}
}
int reducedModeNumber = 2; // reduce the number of chroma modes
for (int i = 0; i < reducedModeNumber; i++)
{
modeIsEnable[satdModeList[uiMaxMode - 1 - i]] = 0; // disable the last reducedModeNumber modes
}

Karsten Suehring
committed
// save the dist
Distortion baseDist = cs.dist;
for (uint32_t uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++)
{
const int chromaIntraMode = chromaCandModes[uiMode];
if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) )
{
continue;
}
if (!modeIsEnable[chromaIntraMode] && PU::isLMCModeEnabled(pu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
{
continue;
}

Karsten Suehring
committed
cs.setDecomp( pu.Cb(), false );
cs.dist = baseDist;
//----- restore context models -----
m_CABACEstimator->getCtx() = ctxStart;
//----- chroma coding -----
pu.intraDir[1] = chromaIntraMode;
xRecurIntraChromaCodingQT( cs, partitioner, bestCostSoFar, ispType );
if( lumaUsesISP && cs.dist == MAX_UINT )
{
continue;
}

Karsten Suehring
committed
#if JVET_O1136_TS_BDPCM_SIGNALLING
if (cs.sps->getTransformSkipEnabledFlag())
#else

Karsten Suehring
committed
if (cs.pps->getUseTransformSkip())
#endif

Karsten Suehring
committed
{
m_CABACEstimator->getCtx() = ctxStart;
}
uint64_t fracBits = xGetIntraFracBitsQT( cs, partitioner, false, true, -1, ispType );

Karsten Suehring
committed
Distortion uiDist = cs.dist;
double dCost = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
//----- compare -----
if( dCost < dBestCost )
{
if( lumaUsesISP && dCost < bestCostSoFar )
{
bestCostSoFar = dCost;
}

Karsten Suehring
committed
for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
{
const CompArea &area = pu.blocks[i];
saveCS.getRecoBuf ( area ).copyFrom( cs.getRecoBuf ( area ) );
#if KEEP_PRED_AND_RESI_SIGNALS
saveCS.getPredBuf ( area ).copyFrom( cs.getPredBuf ( area ) );
saveCS.getResiBuf ( area ).copyFrom( cs.getResiBuf ( area ) );
#endif
saveCS.getPredBuf ( area ).copyFrom( cs.getPredBuf (area ) );
cs.picture->getPredBuf( area ).copyFrom( cs.getPredBuf (area ) );

Karsten Suehring
committed
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf( area ) );
for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
{
saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
}
}
dBestCost = dCost;
uiBestDist = uiDist;
uiBestMode = chromaIntraMode;
}
}
for( uint32_t i = getFirstComponentOfChannel( CHANNEL_TYPE_CHROMA ); i < numberValidComponents; i++ )
{
const CompArea &area = pu.blocks[i];
cs.getRecoBuf ( area ).copyFrom( saveCS.getRecoBuf( area ) );
#if KEEP_PRED_AND_RESI_SIGNALS
cs.getPredBuf ( area ).copyFrom( saveCS.getPredBuf( area ) );
cs.getResiBuf ( area ).copyFrom( saveCS.getResiBuf( area ) );
#endif
cs.getPredBuf ( area ).copyFrom( saveCS.getPredBuf( area ) );
cs.picture->getPredBuf( area ).copyFrom( cs.getPredBuf ( area ) );

Karsten Suehring
committed
cs.picture->getRecoBuf( area ).copyFrom( cs. getRecoBuf( area ) );
for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
{
orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
}
}
}
pu.intraDir[1] = uiBestMode;
cs.dist = uiBestDist;
}
//----- restore context models -----
m_CABACEstimator->getCtx() = ctxStart;
if( lumaUsesISP && bestCostSoFar >= maxCostAllowed )
{
cu.ispMode = 0;
}

Karsten Suehring
committed
}
void IntraSearch::IPCMSearch(CodingStructure &cs, Partitioner& partitioner)
{
ComponentID compStr = (CS::isDualITree(cs) && !isLuma(partitioner.chType)) ? COMPONENT_Cb: COMPONENT_Y;
ComponentID compEnd = (CS::isDualITree(cs) && isLuma(partitioner.chType)) ? COMPONENT_Y : COMPONENT_Cr;
for( ComponentID compID = compStr; compID <= compEnd; compID = ComponentID(compID+1) )

Karsten Suehring
committed
{
xEncPCM(cs, partitioner, compID);
}
cs.getPredBuf().fill(0);
cs.getResiBuf().fill(0);
cs.getOrgResiBuf().fill(0);
cs.dist = 0;
cs.fracBits = 0;
cs.cost = 0;
cs.setDecomp(cs.area);
cs.picture->getPredBuf(cs.area).copyFrom(cs.getPredBuf());

Karsten Suehring
committed
}
void IntraSearch::xEncPCM(CodingStructure &cs, Partitioner& partitioner, const ComponentID &compID)
{
TransformUnit &tu = *cs.getTU( partitioner.chType );
const int channelBitDepth = cs.sps->getBitDepth(toChannelType(compID));
const uint32_t uiPCMBitDepth = cs.sps->getPCMBitDepth(toChannelType(compID));
const int pcmShiftRight = (channelBitDepth - int(uiPCMBitDepth));
CompArea area = tu.blocks[compID];
PelBuf pcmBuf = tu.getPcmbuf (compID);
PelBuf recBuf = cs.getRecoBuf ( area );
CPelBuf orgBuf = cs.getOrgBuf ( area );
CHECK(pcmShiftRight < 0, "Negative shift");
CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
PelBuf tempOrgBuf = m_tmpStorageLCU.getBuf(tmpArea);
tempOrgBuf.copyFrom(orgBuf);
if (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)

Karsten Suehring
committed
for (uint32_t uiY = 0; uiY < pcmBuf.height; uiY++)
{
for (uint32_t uiX = 0; uiX < pcmBuf.width; uiX++)
{
// Encode

Karsten Suehring
committed
// Reconstruction
recBuf.at(uiX, uiY) = pcmBuf.at(uiX, uiY) << pcmShiftRight;
}
}
}
Yung-Hsuan Chao (Jessie)
committed
#if JVET_O0119_BASE_PALETTE_444
void IntraSearch::PLTSearch(CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
m_orgCtxRD = PLTCtx(m_CABACEstimator->getCtx());
if (m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
cs.getPredBuf().copyFrom(cs.getOrgBuf());
cs.getPredBuf().Y().rspSignal(m_pcReshape->getFwdLUT());
}
Pel *runLength = tu.getRunLens (compBegin);
bool *runType = tu.getRunTypes(compBegin);
cu.lastPLTSize[compBegin] = cs.prevPLT.curPLTSize[compBegin];
//derive palette
derivePLTLossy(cs, partitioner, compBegin, numComp);
reorderPLT(cs, partitioner, compBegin, numComp);
//calculate palette index
preCalcPLTIndex(cs, partitioner, compBegin, numComp);
//derive run
uint64_t bits = MAX_UINT;
deriveRunAndCalcBits(cs, partitioner, compBegin, numComp, PLT_SCAN_HORTRAV, bits);
if ((cu.curPLTSize[compBegin] + cu.useEscape[compBegin]) > 1)
{
deriveRunAndCalcBits(cs, partitioner, compBegin, numComp, PLT_SCAN_VERTRAV, bits);
}
cu.useRotation[compBegin] = m_bBestScanRotationMode;
memcpy(runType, m_runTypeRD, sizeof(bool)*width*height);
memcpy(runLength, m_runLengthRD, sizeof(Pel)*width*height);
//reconstruct pixel
PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin);
for (uint32_t y = 0; y < height; y++)
for (uint32_t x = 0; x < width; x++)
if (curPLTIdx.at(x, y) == cu.curPLTSize[compBegin])
{
}
else
{
for (uint32_t compID = compBegin; compID < (compBegin + numComp); compID++)
{
CompArea area = cu.blocks[compID];
PelBuf recBuf = cs.getRecoBuf(area);
uint32_t scaleX = getComponentScaleX((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY((ComponentID)COMPONENT_Cb, cs.sps->getChromaFormatIdc());
if (compBegin != COMPONENT_Y || compID == COMPONENT_Y)
{
recBuf.at(x, y) = cu.curPLT[compID][curPLTIdx.at(x, y)];
else if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && y % (1 << scaleY) == 0 && x % (1 << scaleX) == 0)
recBuf.at(x >> scaleX, y >> scaleY) = cu.curPLT[compID][curPLTIdx.at(x, y)];
}
}
}
}
}
cs.getPredBuf().fill(0);
cs.getResiBuf().fill(0);
cs.getOrgResiBuf().fill(0);
cs.fracBits = MAX_UINT;
cs.cost = MAX_DOUBLE;
Distortion distortion = 0;
for (uint32_t comp = compBegin; comp < (compBegin + numComp); comp++)
{
const ComponentID compID = ComponentID(comp);
CPelBuf reco = cs.getRecoBuf(compID);
CPelBuf org = cs.getOrgBuf(compID);
Yung-Hsuan Chao (Jessie)
committed
#if WCG_EXT
if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
{
const CPelBuf orgLuma = cs.getOrgBuf(cs.area.blocks[COMPONENT_Y]);
if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
{
const CompArea &areaY = cu.Y();
CompArea tmpArea1(COMPONENT_Y, areaY.chromaFormat, Position(0, 0), areaY.size());
PelBuf tmpRecLuma = m_tmpStorageLCU.getBuf(tmpArea1);
tmpRecLuma.copyFrom(reco);
tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
}
else
{
distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
}
}
else
Yung-Hsuan Chao (Jessie)
committed
#endif
distortion += m_pcRdCost->getDistPart(org, reco, cs.sps->getBitDepth(toChannelType(compID)), compID, DF_SSE);
}
Yung-Hsuan Chao (Jessie)
committed
cs.dist += distortion;
const CompArea &area = cu.blocks[compBegin];
cs.setDecomp(area);
cs.picture->getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::deriveRunAndCalcBits(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, uint64_t& minBits)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
Pel *runLength = tu.getRunLens (compBegin);
bool *runType = tu.getRunTypes(compBegin);
cu.useRotation[compBegin] = (pltScanMode == PLT_SCAN_VERTRAV);
m_puiScanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
deriveRun(cs, partitioner, compBegin);
m_CABACEstimator->getCtx() = PLTCtx(m_orgCtxRD);
m_CABACEstimator->resetBits();
CUCtx cuCtx;
cuCtx.isDQPCoded = true;
cuCtx.isChromaQpAdjCoded = true;
m_CABACEstimator->cu_palette_info(cu, compBegin, numComp, cuCtx);
uint64_t bitsTemp = m_CABACEstimator->getEstFracBits();
if (minBits > bitsTemp)
{
m_bBestScanRotationMode = pltScanMode;
memcpy(m_runTypeRD, runType, sizeof(bool)*width*height);
memcpy(m_runLengthRD, runLength, sizeof(Pel)*width*height);
minBits = bitsTemp;
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::deriveRun(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
uint32_t total = height * width, idx = 0;
uint32_t startPos = 0;
uint64_t indexBits = 0, runBitsIndex = 0, runBitsCopy = 0;
m_storeCtx_Run = PLTCtx(m_orgCtxRD);
PLTtypeBuf runType = tu.getrunType(compBegin);
PelBuf runLength = tu.getrunLength(compBegin);
startPos = idx;
double aveBitsPerPix[NUM_PLT_RUN];
uint32_t indexRun = 0;
bool runValid = calIndexRun(cs, partitioner, startPos, total, indexRun, compBegin);
m_CABACEstimator->getCtx() = PLTCtx(m_storeCtx_Run);
aveBitsPerPix[PLT_RUN_INDEX] = runValid ? getRunBits(cu, indexRun, startPos, PLT_RUN_INDEX, &indexBits, &runBitsIndex, compBegin) : MAX_DOUBLE;
m_storeCtx_RunIndex = PLTCtx(m_CABACEstimator->getCtx());
uint32_t copyRun = 0;
bool copyValid = calCopyRun(cs, partitioner, startPos, total, copyRun, compBegin);
m_CABACEstimator->getCtx() = PLTCtx(m_storeCtx_Run);
aveBitsPerPix[PLT_RUN_COPY] = copyValid ? getRunBits(cu, copyRun, startPos, PLT_RUN_COPY, &indexBits, &runBitsCopy, compBegin) : MAX_DOUBLE;
m_storeCtx_RunCopy = PLTCtx(m_CABACEstimator->getCtx());
if (copyValid == 0 && runValid == 0)
{
assert(0);
}
else
{
if (aveBitsPerPix[PLT_RUN_COPY] <= aveBitsPerPix[PLT_RUN_INDEX])
for (int runidx = 0; runidx <copyRun; runidx++)
uint32_t posy = m_puiScanOrder[idx + runidx].y;
uint32_t posx = m_puiScanOrder[idx + runidx].x;
runType.at(posx, posy) = PLT_RUN_COPY;
runLength.at(posx, posy) = copyRun;
m_storeCtx_Run = PLTCtx(m_storeCtx_RunCopy);
}
else
{
for (int runidx = 0; runidx <indexRun; runidx++)
uint32_t posy = m_puiScanOrder[idx + runidx].y;
uint32_t posx = m_puiScanOrder[idx + runidx].x;
runType.at(posx, posy) = PLT_RUN_INDEX;
runLength.at(posx, posy) = indexRun;
m_storeCtx_Run = PLTCtx(m_storeCtx_RunIndex);
}
}
}
Yung-Hsuan Chao (Jessie)
committed
}
double IntraSearch::getRunBits(const CodingUnit& cu, uint32_t run, uint32_t strPos, PLTRunMode paletteRunMode, uint64_t* indexBits, uint64_t* runBits, ComponentID compBegin)
Yung-Hsuan Chao (Jessie)
committed
{
TransformUnit& tu = *cu.firstTU;
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
uint32_t endPos = height*width;
PLTtypeBuf runType = tu.getrunType(compBegin);
PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin);
uint32_t indexMaxSize = (cu.useEscape[compBegin]) ? (cu.curPLTSize[compBegin] + 1) : cu.curPLTSize[compBegin];
m_CABACEstimator->resetBits();
///////////////// encode Run Type
m_CABACEstimator->encodeRunType(cu, runType, strPos, m_puiScanOrder, compBegin);
uint64_t runTypeBits = m_CABACEstimator->getEstFracBits();
uint32_t curLevel = 0;
switch (paletteRunMode)
{
case PLT_RUN_INDEX:
curLevel = m_CABACEstimator->writePLTIndex(cu, strPos, curPLTIdx, runType, indexMaxSize, compBegin);
*indexBits = m_CABACEstimator->getEstFracBits() - runTypeBits;
m_CABACEstimator->cu_run_val(run - 1, PLT_RUN_INDEX, curLevel, endPos - strPos - 1);
*runBits = m_CABACEstimator->getEstFracBits() - runTypeBits - (*indexBits);
break;
case PLT_RUN_COPY:
m_CABACEstimator->cu_run_val(run - 1, PLT_RUN_COPY, curLevel, endPos - strPos - 1);
*runBits = m_CABACEstimator->getEstFracBits() - runTypeBits;
break;
default:
assert(0);
}
assert(run >= 1);
double costPerPixel = (double)m_CABACEstimator->getEstFracBits() / (double)run;
return costPerPixel;
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::preCalcPLTIndex(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
const int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
const int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
PelBuf curPLTIdx = tu.getcurPLTIdx(compBegin);
int errorLimit = numComp * g_uhPLTQuant[cu.qp];
uint32_t bestIdx = 0;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
for (uint32_t x = 0; x < width; x++)
uint32_t pltIdx = 0;
uint32_t minError = MAX_UINT;
while (pltIdx < cu.curPLTSize[compBegin])
uint32_t absError = 0, pX, pY;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
int shift = (comp > 0) ? pcmShiftRight_C : pcmShiftRight_L;
absError += abs(cu.curPLT[comp][pltIdx] - orgBuf[comp].at(pX, pY)) >> shift;
bestIdx = pltIdx;
minError = absError;
if (minError == 0)
{
break;
}
}
curPLTIdx.at(x, y) = bestIdx;
if (minError > errorLimit)
curPLTIdx.at(x, y) = cu.curPLTSize[compBegin];
cu.useEscape[compBegin] = true;
calcPixelPred(cs, partitioner, y, x, compBegin, numComp);
}
}
}
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
TransformUnit &tu = *cs.getTU(partitioner.chType);
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int qp[3];
int qpRem[3];
int qpPer[3];
int quantiserScale[3];
int quantiserRightShift[3];
int rightShiftOffset[3];
int InvquantiserRightShift[3];
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
QpParam cQP(tu, ComponentID(ch));
qp[ch] = cQP.Qp;
qpRem[ch] = qp[ch] % 6;
qpPer[ch] = qp[ch] / 6;
quantiserScale[ch] = g_quantScales[0][qpRem[ch]];
quantiserRightShift[ch] = QUANT_SHIFT + qpPer[ch];
rightShiftOffset[ch] = 1 << (quantiserRightShift[ch] - 1);
InvquantiserRightShift[ch] = IQUANT_SHIFT;
add[ch] = 1 << (InvquantiserRightShift[ch] - 1);
}
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t ch = compBegin; ch < (compBegin + numComp); ch++)
{
const int channelBitDepth = cu.cs->sps->getBitDepth(toChannelType((ComponentID)ch));
CompArea area = cu.blocks[ch];
PelBuf recBuf = cs.getRecoBuf(area);
PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID)ch);
if (compBegin != COMPONENT_Y || ch == 0)
{
escapeValue.at(xPos, yPos) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPos, yPos) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPos, yPos) < (1 << (channelBitDepth + 1)));
recBuf.at(xPos, yPos) = (((escapeValue.at(xPos, yPos)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> InvquantiserRightShift[ch];
recBuf.at(xPos, yPos) = Pel(ClipBD<int>(recBuf.at(xPos, yPos), channelBitDepth));//to be checked
else if (compBegin == COMPONENT_Y && ch > 0 && yPos % (1 << scaleY) == 0 && xPos % (1 << scaleX) == 0)
uint32_t yPosC = yPos >> scaleY;
uint32_t xPosC = xPos >> scaleX;
escapeValue.at(xPosC, yPosC) = TCoeff(std::max<int>(0, ((orgBuf[ch].at(xPosC, yPosC) * quantiserScale[ch] + rightShiftOffset[ch]) >> quantiserRightShift[ch])));
assert(escapeValue.at(xPosC, yPosC) < (1 << (channelBitDepth + 1)));
recBuf.at(xPosC, yPosC) = (((escapeValue.at(xPosC, yPosC)*g_invQuantScales[0][qpRem[ch]]) << qpPer[ch]) + add[ch]) >> InvquantiserRightShift[ch];
recBuf.at(xPosC, yPosC) = Pel(ClipBD<int>(recBuf.at(xPosC, yPosC), channelBitDepth));//to be checked
}
}
Yung-Hsuan Chao (Jessie)
committed
}
void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp)
Yung-Hsuan Chao (Jessie)
committed
{
CodingUnit &cu = *cs.getCU(partitioner.chType);
const int channelBitDepth_L = cs.sps->getBitDepth(CHANNEL_TYPE_LUMA);
const int channelBitDepth_C = cs.sps->getBitDepth(CHANNEL_TYPE_CHROMA);
const int pcmShiftRight_L = (channelBitDepth_L - PLT_ENCBITDEPTH);
const int pcmShiftRight_C = (channelBitDepth_C - PLT_ENCBITDEPTH);
uint32_t height = cu.block(compBegin).height;
uint32_t width = cu.block(compBegin).width;
CPelBuf orgBuf[3];
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
CompArea area = cu.blocks[comp];
if (m_pcEncCfg->getReshaper() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
{
orgBuf[comp] = cs.getPredBuf(area);
}
else
{
orgBuf[comp] = cs.getOrgBuf(area);
}
}
int errorLimit = g_uhPLTQuant[cu.qp];
uint32_t totalSize = height*width;
SortingElement *pelList = new SortingElement[totalSize];
SortingElement element;
SortingElement *pelListSort = new SortingElement[MAXPLTSIZE + 1];
uint32_t dictMaxSize = MAXPLTSIZE;
uint32_t idx = 0;
int last = -1;
uint32_t scaleX = getComponentScaleX(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
uint32_t scaleY = getComponentScaleY(COMPONENT_Cb, cs.sps->getChromaFormatIdc());
for (uint32_t y = 0; y < height; y++)
for (uint32_t x = 0; x < width; x++)
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
pX = (comp > 0 && compBegin == COMPONENT_Y) ? (x >> scaleX) : x;
pY = (comp > 0 && compBegin == COMPONENT_Y) ? (y >> scaleY) : y;
org[comp] = orgBuf[comp].at(pX, pY);
element.setAll(org, compBegin, numComp);
int besti = last, bestSAD = (last == -1) ? MAX_UINT : pelList[last].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp);
if (bestSAD)
{
for (int i = idx - 1; i >= 0; i--)
uint32_t sad = pelList[i].getSAD(element, cs.sps->getBitDepths(), compBegin, numComp);
if (sad < bestSAD)
{
bestSAD = sad;
besti = i;
if (!sad) break;
}
}
}
if (besti >= 0 && pelList[besti].almostEqualData(element, errorLimit, cs.sps->getBitDepths(), compBegin, numComp))
pelList[besti].addElement(element, compBegin, numComp);
last = besti;
}
else
{
pelList[idx].copyDataFrom(element, compBegin, numComp);
pelList[idx].cnt = 1;
last = idx;
idx++;
}
}
}
for (int i = 0; i < dictMaxSize; i++)
pelListSort[i].cnt = 0;
pelListSort[i].resetAll(compBegin, numComp);
}
//bubble sorting
dictMaxSize = 1;
for (int i = 0; i < idx; i++)
if (pelList[i].cnt > pelListSort[dictMaxSize - 1].cnt)
{
int j;
for (j = dictMaxSize; j > 0; j--)
if (pelList[i].cnt > pelListSort[j - 1].cnt)
pelListSort[j].copyAllFrom(pelListSort[j - 1], compBegin, numComp);
dictMaxSize = std::min(dictMaxSize + 1, (uint32_t)MAXPLTSIZE);
}
else
{
break;
}
}
pelListSort[j].copyAllFrom(pelList[i], compBegin, numComp);
}
}
uint64_t numColorBits = 0;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
numColorBits += (comp > 0) ? channelBitDepth_C : channelBitDepth_L;
}
double bitCost = m_pcRdCost->getLambda()*numColorBits;
for (int i = 0; i < MAXPLTSIZE; i++)
{
int half = pelListSort[i].cnt >> 1;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
cu.curPLT[comp][paletteSize] = (pelListSort[i].sumData[comp] + half) / pelListSort[i].cnt;
}
int best = -1;
{
double pal[MAX_NUM_COMPONENT], err = 0.0, bestCost = 0.0;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
const int shift = (comp > 0) ? pcmShiftRight_C : pcmShiftRight_L;
pal[comp] = pelListSort[i].sumData[comp] / (double)pelListSort[i].cnt;
err = pal[comp] - cu.curPLT[comp][paletteSize];
bestCost += (err*err) / (1 << (2 * shift));
}
bestCost = bestCost * pelListSort[i].cnt + bitCost;
for (int t = 0; t < cs.prevPLT.curPLTSize[compBegin]; t++)
{
double cost = 0.0;
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
const int shift = (comp > 0) ? pcmShiftRight_C : pcmShiftRight_L;
err = pal[comp] - cs.prevPLT.curPLT[comp][t];
cost += (err*err) / (1 << (2 * shift));
}
cost *= pelListSort[i].cnt;
if (cost < bestCost)
{
best = t;
bestCost = cost;
}
}
if (best != -1)
{
for (int comp = compBegin; comp < (compBegin + numComp); comp++)
{
cu.curPLT[comp][paletteSize] = cs.prevPLT.curPLT[comp][best];
}
}
}
bool duplicate = false;
if (pelListSort[i].cnt == 1 && best == -1)