[x265] refine partition size related

Satoshi Nakagawa nakagawa424 at oki.com
Tue Jul 22 08:57:52 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1406011990 -32400
#      Tue Jul 22 15:53:10 2014 +0900
# Node ID b2ad081e4bfc20bbc84e8bfbab59ed52aeac2a73
# Parent  d303b4d860e9f06396a156726dd518d0f41fe796
refine partition size related

- reorder LumaPartitions to simplify partitionFromLog2Size()
- remove unused


diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -127,6 +127,15 @@
     primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], getCStride(), srcV, srcPicYuv->getCStride());
 }
 
+void TComYuv::copyFromYuv(TComYuv* srcYuv)
+{
+    X265_CHECK(m_width <= srcYuv->m_width && m_height <= srcYuv->m_height, "invalid size\n");
+
+    primitives.luma_copy_pp[m_part](m_buf[0], m_width, srcYuv->m_buf[0], srcYuv->m_width);
+    primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_cwidth, srcYuv->m_buf[1], srcYuv->m_cwidth);
+    primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_cwidth, srcYuv->m_buf[2], srcYuv->m_cwidth);
+}
+
 void TComYuv::copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx)
 {
     pixel* dstY = dstPicYuv->getLumaAddr(partIdx);
@@ -156,50 +165,9 @@
     primitives.chroma[m_csp].copy_pp[part](dstV, dstPicYuv->getCStride(), srcV, getCStride());
 }
 
-void TComYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
+void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size)
 {
-    int part = partitionFromSizes(width, height);
-
-    X265_CHECK(width != 4 || height != 4, "4x4 partition detected\n");
-
-    if (bLuma)
-    {
-        pixel* src = getLumaAddr(partIdx);
-        pixel* dst = dstPicYuv->getLumaAddr(partIdx);
-
-        uint32_t srcstride = getStride();
-        uint32_t dststride = dstPicYuv->getStride();
-
-        primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
-    }
-    if (bChroma)
-    {
-        pixel* srcU = getCbAddr(partIdx);
-        pixel* srcV = getCrAddr(partIdx);
-        pixel* dstU = dstPicYuv->getCbAddr(partIdx);
-        pixel* dstV = dstPicYuv->getCrAddr(partIdx);
-
-        uint32_t srcstride = getCStride();
-        uint32_t dststride = dstPicYuv->getCStride();
-
-        primitives.chroma[m_csp].copy_pp[part](dstU, dststride, srcU, srcstride);
-        primitives.chroma[m_csp].copy_pp[part](dstV, dststride, srcV, srcstride);
-    }
-}
-
-void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize)
-{
-    int part = partitionFromSize(lumaSize);
-
-    int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
-    uint32_t dststride = dstPicYuv->m_width;
-
-    primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), getStride());
-}
-
-void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
-{
-    int part = partitionFromSize(partSize);
+    int part = partitionFromLog2Size(log2Size);
 
     addClipLuma(srcYuv0, srcYuv1, part);
     addClipChroma(srcYuv0, srcYuv1, part);
@@ -235,113 +203,32 @@
     primitives.chroma[m_csp].add_ps[part](dstV, dststride, srcV0, srcV1, src0Stride, src1Stride);
 }
 
-void TComYuv::addAvg(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
-{
-    int x, y;
-    uint32_t src0Stride, src1Stride, dststride;
-    int shiftNum, offset;
-
-    pixel* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
-    pixel* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
-    pixel* srcV0 = srcYuv0->getCrAddr(partUnitIdx);
-
-    pixel* srcY1 = srcYuv1->getLumaAddr(partUnitIdx);
-    pixel* srcU1 = srcYuv1->getCbAddr(partUnitIdx);
-    pixel* srcV1 = srcYuv1->getCrAddr(partUnitIdx);
-
-    pixel* dstY  = getLumaAddr(partUnitIdx);
-    pixel* dstU  = getCbAddr(partUnitIdx);
-    pixel* dstV  = getCrAddr(partUnitIdx);
-
-    if (bLuma)
-    {
-        src0Stride = srcYuv0->getStride();
-        src1Stride = srcYuv1->getStride();
-        dststride  = getStride();
-        shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-        offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
-        for (y = 0; y < height; y++)
-        {
-            for (x = 0; x < width; x += 4)
-            {
-                dstY[x + 0] = Clip((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);
-                dstY[x + 1] = Clip((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);
-                dstY[x + 2] = Clip((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);
-                dstY[x + 3] = Clip((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);
-            }
-
-            srcY0 += src0Stride;
-            srcY1 += src1Stride;
-            dstY  += dststride;
-        }
-    }
-    if (bChroma)
-    {
-        shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-        offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
-        src0Stride = srcYuv0->getCStride();
-        src1Stride = srcYuv1->getCStride();
-        dststride  = getCStride();
-
-        width  >>= m_hChromaShift;
-        height >>= m_vChromaShift;
-
-        for (y = height - 1; y >= 0; y--)
-        {
-            for (x = width - 1; x >= 0; )
-            {
-                // note: chroma min width is 2
-                dstU[x] = Clip((srcU0[x] + srcU1[x] + offset) >> shiftNum);
-                dstV[x] = Clip((srcV0[x] + srcV1[x] + offset) >> shiftNum);
-                x--;
-                dstU[x] = Clip((srcU0[x] + srcU1[x] + offset) >> shiftNum);
-                dstV[x] = Clip((srcV0[x] + srcV1[x] + offset) >> shiftNum);
-                x--;
-            }
-
-            srcU0 += src0Stride;
-            srcU1 += src1Stride;
-            srcV0 += src0Stride;
-            srcV1 += src1Stride;
-            dstU  += dststride;
-            dstV  += dststride;
-        }
-    }
-}
-
 void TComYuv::addAvg(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
 {
-    uint32_t src0Stride, src1Stride, dststride;
-
-    int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
-    int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
-    int16_t* srcV0 = srcYuv0->getCrAddr(partUnitIdx);
-
-    int16_t* srcY1 = srcYuv1->getLumaAddr(partUnitIdx);
-    int16_t* srcU1 = srcYuv1->getCbAddr(partUnitIdx);
-    int16_t* srcV1 = srcYuv1->getCrAddr(partUnitIdx);
-
-    pixel* dstY = getLumaAddr(partUnitIdx);
-    pixel* dstU = getCbAddr(partUnitIdx);
-    pixel* dstV = getCrAddr(partUnitIdx);
-
     int part = partitionFromSizes(width, height);
 
     if (bLuma)
     {
-        src0Stride = srcYuv0->m_width;
-        src1Stride = srcYuv1->m_width;
-        dststride  = getStride();
+        int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
+        int16_t* srcY1 = srcYuv1->getLumaAddr(partUnitIdx);
+        pixel* dstY = getLumaAddr(partUnitIdx);
+        uint32_t src0Stride = srcYuv0->m_width;
+        uint32_t src1Stride = srcYuv1->m_width;
+        uint32_t dststride  = getStride();
 
         primitives.luma_addAvg[part](srcY0, srcY1, dstY, src0Stride, src1Stride, dststride);
     }
     if (bChroma)
     {
-        src0Stride = srcYuv0->m_cwidth;
-        src1Stride = srcYuv1->m_cwidth;
-        dststride  = getCStride();
+        int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
+        int16_t* srcV0 = srcYuv0->getCrAddr(partUnitIdx);
+        int16_t* srcU1 = srcYuv1->getCbAddr(partUnitIdx);
+        int16_t* srcV1 = srcYuv1->getCrAddr(partUnitIdx);
+        pixel* dstU = getCbAddr(partUnitIdx);
+        pixel* dstV = getCrAddr(partUnitIdx);
+        uint32_t src0Stride = srcYuv0->m_cwidth;
+        uint32_t src1Stride = srcYuv1->m_cwidth;
+        uint32_t dststride  = getCStride();
 
         primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, src0Stride, src1Stride, dststride);
         primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, src0Stride, src1Stride, dststride);
diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.h	Tue Jul 22 15:53:10 2014 +0900
@@ -121,28 +121,25 @@
     //  Copy YUV buffer from picture buffer
     void    copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx);
 
+    //  Copy from same size YUV buffer
+    void    copyFromYuv(TComYuv* srcYuv);
+
     //  Copy Small YUV buffer to the part of other Big YUV buffer
     void    copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx);
 
     //  Copy the part of Big YUV buffer to other Small YUV buffer
     void    copyPartToYuv(TComYuv* dstPicYuv, uint32_t srcPartIdx);
 
-    //  Copy YUV partition buffer to other YUV partition buffer
-    void    copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
-
-    void    copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize);
-
     // ------------------------------------------------------------------------------------------------------------------
     //  Algebraic operation for YUV buffer
     // ------------------------------------------------------------------------------------------------------------------
 
     //  Clip(srcYuv0 + srcYuv1) -> m_apiBuf
-    void    addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
+    void    addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size);
     void    addClipLuma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part);
     void    addClipChroma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part);
 
     //  (srcYuv0 + srcYuv1)/2 for YUV partition
-    void    addAvg(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
     void    addAvg(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
 
     // ------------------------------------------------------------------------------------------------------------------
diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -2297,7 +2297,7 @@
     {
         cu->setSkipFlagSubParts(true, 0, depth);
 
-        predYuv->copyToPartYuv(outReconYuv, 0);
+        outReconYuv->copyFromYuv(predYuv);
         // Luma
         int part = partitionFromLog2Size(log2CUSize);
         distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
@@ -2426,9 +2426,9 @@
         X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
 
         if (cu->getQtRootCbf(0))
-            outReconYuv->addClip(predYuv, outBestResiYuv, cuSize);
+            outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
         else
-            predYuv->copyToPartYuv(outReconYuv, 0);
+            outReconYuv->copyFromYuv(predYuv);
 
         // update with clipped distortion and cost (qp estimation loop uses unclipped values)
         int part = partitionFromLog2Size(log2CUSize);
@@ -2457,7 +2457,7 @@
 {
     if (skipRes && cu->getPredictionMode(0) == MODE_INTER && cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
     {
-        predYuv->copyToPartYuv(reconYuv, 0);
+        reconYuv->copyFromYuv(predYuv);
         cu->clearCbf(0, cu->getDepth(0));
         return;
     }
@@ -2467,12 +2467,11 @@
     if (cu->getPredictionMode(0) == MODE_INTER)
     {
         residualTransformQuantInter(cu, 0, resiYuv, cu->getDepth(0), true);
-        uint32_t cuSize = 1 << cu->getLog2CUSize(0);
         if (cu->getQtRootCbf(0))
-            reconYuv->addClip(predYuv, resiYuv, cuSize);
+            reconYuv->addClip(predYuv, resiYuv, cu->getLog2CUSize(0));
         else
         {
-            predYuv->copyToPartYuv(reconYuv, 0);
+            reconYuv->copyFromYuv(predYuv);
             if (cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
                 cu->setSkipFlagSubParts(true, 0, cu->getDepth(0));
         }
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/pixel.cpp
--- a/source/common/pixel.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/pixel.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -1020,9 +1020,7 @@
     p.chroma[X265_CSP_I422].copy_pp[CHROMA422X_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
     p.chroma[X265_CSP_I422].copy_sp[CHROMA422X_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
     p.chroma[X265_CSP_I422].copy_ps[CHROMA422X_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
-    p.chroma[X265_CSP_I422].copy_ss[CHROMA422X_ ## W ## x ## H] = blockcopy_ss_c<W, H>; \
-    p.chroma[X265_CSP_I422].copy_sp[NUM_CHROMA_PARTITIONS422] = blockcopy_sp_c<W, (H >> 1)>; \
-    p.chroma[X265_CSP_I422].copy_ps[NUM_CHROMA_PARTITIONS422] = blockcopy_ps_c<W, (H >> 1)>;
+    p.chroma[X265_CSP_I422].copy_ss[CHROMA422X_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
 
 #define CHROMA_444(W, H) \
     p.chroma[X265_CSP_I444].addAvg[LUMA_ ## W ## x ## H]  = addAvg<W, H>; \
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/primitives.cpp
--- a/source/common/primitives.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/primitives.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -49,16 +49,6 @@
     255,        255,      255,        LUMA_64x16, 255, 255,        255, LUMA_64x32, 255, 255, 255, LUMA_64x48, 255, 255, 255, LUMA_64x64  // 64
 };
 
-extern const uint8_t lumaSquarePartitionMapTable[] =
-{
-    LUMA_4x4,  LUMA_8x8,  255,        LUMA_16x16, 255, 255,        255, LUMA_32x32, 255, 255, 255, 255,        255, 255, 255, LUMA_64x64
-};
-
-extern const uint8_t lumaPartitionsFromSquareBlocksTable[] =
-{
-    LUMA_4x4, LUMA_8x8, LUMA_16x16, LUMA_32x32, LUMA_64x64
-};
-
 /* the "authoritative" set of encoder primitives */
 EncoderPrimitives primitives;
 
@@ -77,7 +67,7 @@
     Setup_C_LoopFilterPrimitives(p); // loopfilter.cpp
 }
 
-static void Setup_Alias_Primitives(EncoderPrimitives &p)
+void Setup_Alias_Primitives(EncoderPrimitives &p)
 {
     /* copy reusable luma primitives to chroma 4:4:4 */
     for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
@@ -93,7 +83,7 @@
 
     for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
     {
-        int partL = lumaPartitionsFromSquareBlocksTable[i];
+        int partL = partitionFromLog2Size(i + 2);
         p.square_copy_pp[i] = p.luma_copy_pp[partL];
         p.square_copy_ps[i] = p.luma_copy_ps[partL];
         p.square_copy_sp[i] = p.luma_copy_sp[partL];
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/primitives.h
--- a/source/common/primitives.h	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/primitives.h	Tue Jul 22 15:53:10 2014 +0900
@@ -36,12 +36,18 @@
 // x265 private namespace
 
 enum LumaPartitions
-{ // Square     Rectangular             Asymmetrical (0.75, 0.25)
-    LUMA_4x4,
-    LUMA_8x8,   LUMA_8x4,   LUMA_4x8,
-    LUMA_16x16, LUMA_16x8,  LUMA_8x16,  LUMA_16x12, LUMA_12x16, LUMA_16x4,  LUMA_4x16,
-    LUMA_32x32, LUMA_32x16, LUMA_16x32, LUMA_32x24, LUMA_24x32, LUMA_32x8,  LUMA_8x32,
-    LUMA_64x64, LUMA_64x32, LUMA_32x64, LUMA_64x48, LUMA_48x64, LUMA_64x16, LUMA_16x64,
+{
+    // Square
+    LUMA_4x4,   LUMA_8x8,   LUMA_16x16, LUMA_32x32, LUMA_64x64,
+    // Rectangular
+    LUMA_8x4,   LUMA_4x8,
+    LUMA_16x8,  LUMA_8x16,  
+    LUMA_32x16, LUMA_16x32,
+    LUMA_64x32, LUMA_32x64,
+    // Asymmetrical (0.75, 0.25)
+    LUMA_16x12, LUMA_12x16, LUMA_16x4,  LUMA_4x16,
+    LUMA_32x24, LUMA_24x32, LUMA_32x8,  LUMA_8x32,
+    LUMA_64x48, LUMA_48x64, LUMA_64x16, LUMA_16x64,
     NUM_LUMA_PARTITIONS
 };
 
@@ -50,21 +56,27 @@
 // be indexed by the luma partition enum
 enum Chroma420Partitions
 {
-    CHROMA_2x2, // never used by HEVC
-    CHROMA_4x4,   CHROMA_4x2,   CHROMA_2x4,
-    CHROMA_8x8,   CHROMA_8x4,   CHROMA_4x8,   CHROMA_8x6,   CHROMA_6x8,   CHROMA_8x2,  CHROMA_2x8,
-    CHROMA_16x16, CHROMA_16x8,  CHROMA_8x16,  CHROMA_16x12, CHROMA_12x16, CHROMA_16x4, CHROMA_4x16,
-    CHROMA_32x32, CHROMA_32x16, CHROMA_16x32, CHROMA_32x24, CHROMA_24x32, CHROMA_32x8, CHROMA_8x32,
+    CHROMA_2x2,   CHROMA_4x4,   CHROMA_8x8,   CHROMA_16x16, CHROMA_32x32,
+    CHROMA_4x2,   CHROMA_2x4,
+    CHROMA_8x4,   CHROMA_4x8,
+    CHROMA_16x8,  CHROMA_8x16,
+    CHROMA_32x16, CHROMA_16x32,
+    CHROMA_8x6,   CHROMA_6x8,   CHROMA_8x2,  CHROMA_2x8,
+    CHROMA_16x12, CHROMA_12x16, CHROMA_16x4, CHROMA_4x16,
+    CHROMA_32x24, CHROMA_24x32, CHROMA_32x8, CHROMA_8x32,
     NUM_CHROMA_PARTITIONS
 };
 
 enum Chroma422Partitions
 {
-    CHROMA422X_4x8,
-    CHROMA422_4x8,   CHROMA422_4x4,   CHROMA422_2x8,
-    CHROMA422_8x16,  CHROMA422_8x8,   CHROMA422_4x16,  CHROMA422_8x12,  CHROMA422_6x16,  CHROMA422_8x4,   CHROMA422_2x16,
-    CHROMA422_16x32, CHROMA422_16x16, CHROMA422_8x32,  CHROMA422_16x24, CHROMA422_12x32, CHROMA422_16x8,  CHROMA422_4x32,
-    CHROMA422_32x64, CHROMA422_32x32, CHROMA422_16x64, CHROMA422_32x48, CHROMA422_24x64, CHROMA422_32x16, CHROMA422_8x64,
+    CHROMA422X_4x8,  CHROMA422_4x8,   CHROMA422_8x16,  CHROMA422_16x32, CHROMA422_32x64,
+    CHROMA422_4x4,   CHROMA422_2x8,
+    CHROMA422_8x8,   CHROMA422_4x16,
+    CHROMA422_16x16, CHROMA422_8x32,
+    CHROMA422_32x32, CHROMA422_16x64,
+    CHROMA422_8x12,  CHROMA422_6x16,  CHROMA422_8x4,   CHROMA422_2x16,
+    CHROMA422_16x24, CHROMA422_12x32, CHROMA422_16x8,  CHROMA422_4x32,
+    CHROMA422_32x48, CHROMA422_24x64, CHROMA422_32x16, CHROMA422_8x64,
     NUM_CHROMA_PARTITIONS422
 };
 
@@ -111,20 +123,10 @@
     return part;
 }
 
-inline int partitionFromSize(int size)
-{
-    X265_CHECK((size & ~(4 | 8 | 16 | 32 | 64)) == 0, "Invalid block size\n");
-    extern const uint8_t lumaSquarePartitionMapTable[];
-    int part = (int)lumaSquarePartitionMapTable[(size >> 2) - 1];
-    X265_CHECK(part != 255, "Invalid block size %d\n", size);
-    return part;
-}
-
 inline int partitionFromLog2Size(int log2Size)
 {
     X265_CHECK(2 <= log2Size && log2Size <= 6, "Invalid block size\n");
-    extern const uint8_t lumaPartitionsFromSquareBlocksTable[];
-    return (int)lumaPartitionsFromSquareBlocksTable[log2Size - 2];
+    return log2Size - 2;
 }
 
 typedef int  (*pixelcmp_t)(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride); // fenc is aligned
@@ -284,8 +286,8 @@
         filter_pp_t     filter_hpp[NUM_LUMA_PARTITIONS];
         filter_hps_t    filter_hps[NUM_LUMA_PARTITIONS];
         copy_pp_t       copy_pp[NUM_LUMA_PARTITIONS];
-        copy_sp_t       copy_sp[NUM_LUMA_PARTITIONS + 1];
-        copy_ps_t       copy_ps[NUM_LUMA_PARTITIONS + 1];
+        copy_sp_t       copy_sp[NUM_LUMA_PARTITIONS];
+        copy_ps_t       copy_ps[NUM_LUMA_PARTITIONS];
         copy_ss_t       copy_ss[NUM_LUMA_PARTITIONS];
         pixel_sub_ps_t  sub_ps[NUM_LUMA_PARTITIONS];
         pixel_add_ps_t  add_ps[NUM_LUMA_PARTITIONS];
@@ -302,6 +304,7 @@
 void Setup_C_Primitives(EncoderPrimitives &p);
 void Setup_Instrinsic_Primitives(EncoderPrimitives &p, int cpuMask);
 void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask);
+void Setup_Alias_Primitives(EncoderPrimitives &p);
 }
 
 #endif // ifndef X265_PRIMITIVES_H
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/shortyuv.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -103,22 +103,6 @@
     primitives.chroma[m_csp].sub_ps[part](getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->getCStride(), srcYuv1->getCStride());
 }
 
-void ShortYuv::addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
-{
-    int16_t* srcY0 = srcYuv0->getLumaAddr();
-    int16_t* srcY1 = srcYuv1->getLumaAddr();
-
-    primitives.pixeladd_ss(partSize, partSize, getLumaAddr(), m_width, srcY0, srcY1, srcYuv0->m_width, srcYuv1->m_width);
-
-    uint32_t cpartSize = partSize >> m_hChromaShift;
-    int16_t* srcU0 = srcYuv0->getCbAddr();
-    int16_t* srcU1 = srcYuv1->getCbAddr();
-    int16_t* srcV0 = srcYuv0->getCrAddr();
-    int16_t* srcV1 = srcYuv1->getCrAddr();
-    primitives.pixeladd_ss(cpartSize, cpartSize, getCbAddr(), m_cwidth, srcU0, srcU1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
-    primitives.pixeladd_ss(cpartSize, cpartSize, getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
-}
-
 void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size)
 {
     int16_t* src = getLumaAddr(partIdx);
@@ -161,16 +145,3 @@
     primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
     primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
 }
-
-void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
-{
-    X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
-
-    int part = partitionFromSize(lumaSize);
-
-    int16_t* src = getChromaAddr(chromaId, partIdx);
-    int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
-    uint32_t srcStride = m_cwidth;
-    uint32_t dstStride = dstPicYuv->m_cwidth;
-    primitives.chroma[m_csp].copy_ss[part](dst, dstStride, src, srcStride);
-}
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/shortyuv.h
--- a/source/common/shortyuv.h	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/shortyuv.h	Tue Jul 22 15:53:10 2014 +0900
@@ -90,12 +90,9 @@
     int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
 
     void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t log2Size);
-    void addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
 
     void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size);
     void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL);
-    void copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
-
     void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size);
     void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL);
 
diff -r d303b4d860e9 -r b2ad081e4bfc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/encoder/analysis.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -1847,6 +1847,7 @@
 
     if (lcu->getPredictionMode(absPartIdx) == MODE_INTER)
     {
+        int part = partitionFromLog2Size(cu->getLog2CUSize(0));
         if (!lcu->getSkipFlag(absPartIdx))
         {
             // Calculate Residue
@@ -1856,7 +1857,6 @@
             uint32_t src2stride = m_bestPredYuv[0]->getStride();
             uint32_t src1stride = m_origYuv[0]->getStride();
             uint32_t dststride = m_tmpResiYuv[depth]->m_width;
-            int part = partitionFromLog2Size(cu->getLog2CUSize(0));
             primitives.luma_sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
 
             src2 = m_bestPredYuv[0]->getCbAddr(absPartIdx);
@@ -1915,7 +1915,6 @@
 
         // Generate Recon
         TComPicYuv* rec = pic->getPicYuvRec();
-        int part = partitionFromLog2Size(cu->getLog2CUSize(0));
         pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
         pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
         uint32_t srcstride = m_bestPredYuv[0]->getStride();
diff -r d303b4d860e9 -r b2ad081e4bfc source/test/testbench.cpp
--- a/source/test/testbench.cpp	Mon Jul 21 22:43:38 2014 -0500
+++ b/source/test/testbench.cpp	Tue Jul 22 15:53:10 2014 +0900
@@ -127,6 +127,7 @@
     EncoderPrimitives cprim;
     memset(&cprim, 0, sizeof(EncoderPrimitives));
     Setup_C_Primitives(cprim);
+    Setup_Alias_Primitives(cprim);
 
     struct test_arch_t
     {
@@ -186,6 +187,7 @@
     memset(&optprim, 0, sizeof(optprim));
     Setup_Instrinsic_Primitives(optprim, cpuid);
     Setup_Assembly_Primitives(optprim, cpuid);
+    Setup_Alias_Primitives(optprim);
 
     printf("\nTest performance improvement with full optimizations\n");
 


More information about the x265-devel mailing list