[x265] [PATCH V2] rext: partial support for 4:2:2 and 4:4:4 color spaces

Steve Borho steve at borho.org
Tue Nov 5 23:01:34 CET 2013


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1383684790 21600
#      Tue Nov 05 14:53:10 2013 -0600
# Node ID dace992d1d66e2c4eaaef996fd35d44b42798450
# Parent  695e69ec99dbd4894b5703847496d367bf6d17ef
rext: partial support for 4:2:2 and 4:4:4 color spaces

reconFileBitDepth did not need to be a member of x265_param, and the comment
for inputBitDepth needed to mention that it also determined the internal bit
depth of the encoder.

diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -100,13 +100,18 @@
     m_mvpIdx[1] = NULL;
     m_mvpNum[0] = NULL;
     m_mvpNum[1] = NULL;
+    m_chromaFormat = 0;
 }
 
 TComDataCU::~TComDataCU()
 {}
 
-void TComDataCU::create(uint32_t numPartition, uint32_t width, uint32_t height, int unitSize)
+void TComDataCU::create(uint32_t numPartition, uint32_t width, uint32_t height, int unitSize, int csp)
 {
+    m_hChromaShift = CHROMA_H_SHIFT(csp);
+    m_vChromaShift = CHROMA_V_SHIFT(csp);
+    m_chromaFormat = csp;
+
     m_pic           = NULL;
     m_slice         = NULL;
     m_numPartitions = numPartition;
@@ -151,13 +156,14 @@
     m_mvpNum[1] = new char[numPartition];
 
     m_trCoeffY  = (TCoeff*)X265_MALLOC(TCoeff, width * height);
-    m_trCoeffCb = (TCoeff*)X265_MALLOC(TCoeff, width * height / 4);
-    m_trCoeffCr = (TCoeff*)X265_MALLOC(TCoeff, width * height / 4);
+    m_trCoeffCb = (TCoeff*)X265_MALLOC(TCoeff, (width >> m_hChromaShift) * (height >> m_vChromaShift));
+    m_trCoeffCr = (TCoeff*)X265_MALLOC(TCoeff, (width >> m_hChromaShift) * (height >> m_vChromaShift));
+
 
     m_iPCMFlags   = (bool*)X265_MALLOC(bool, numPartition);
     m_iPCMSampleY  = (Pel*)X265_MALLOC(Pel, width * height);
-    m_iPCMSampleCb = (Pel*)X265_MALLOC(Pel, width * height / 4);
-    m_iPCMSampleCr = (Pel*)X265_MALLOC(Pel, width * height / 4);
+    m_iPCMSampleCb = (Pel*)X265_MALLOC(Pel, (width >> m_hChromaShift) * (height >> m_vChromaShift));
+    m_iPCMSampleCr = (Pel*)X265_MALLOC(Pel, (width >> m_hChromaShift) * (height >> m_vChromaShift));
 
     m_cuMvField[0].create(numPartition);
     m_cuMvField[1].create(numPartition);
@@ -311,31 +317,32 @@
         memset(m_iPCMFlags        + firstElement, false,                    numElements * sizeof(*m_iPCMFlags));
     }
 
-    uint32_t tmp = g_maxCUWidth * g_maxCUHeight;
+    uint32_t y_tmp = g_maxCUWidth * g_maxCUHeight;
+    uint32_t c_tmp = (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift);
     if (0 >= partStartIdx)
     {
         m_cuMvField[0].clearMvField();
         m_cuMvField[1].clearMvField();
-        memset(m_trCoeffY, 0, sizeof(TCoeff) * tmp);
-        memset(m_iPCMSampleY, 0, sizeof(Pel) * tmp);
-        tmp  >>= 2;
-        memset(m_trCoeffCb, 0, sizeof(TCoeff) * tmp);
-        memset(m_trCoeffCr, 0, sizeof(TCoeff) * tmp);
-        memset(m_iPCMSampleCb, 0, sizeof(Pel) * tmp);
-        memset(m_iPCMSampleCr, 0, sizeof(Pel) * tmp);
+        memset(m_trCoeffY, 0, sizeof(TCoeff) * y_tmp);
+        memset(m_iPCMSampleY, 0, sizeof(Pel) * y_tmp);
+
+        memset(m_trCoeffCb, 0, sizeof(TCoeff) * c_tmp);
+        memset(m_trCoeffCr, 0, sizeof(TCoeff) * c_tmp);
+        memset(m_iPCMSampleCb, 0, sizeof(Pel) * c_tmp);
+        memset(m_iPCMSampleCr, 0, sizeof(Pel) * c_tmp);
     }
     else
     {
         TComDataCU * from = pic->getCU(getAddr());
         m_cuMvField[0].copyFrom(&from->m_cuMvField[0], m_numPartitions, 0);
         m_cuMvField[1].copyFrom(&from->m_cuMvField[1], m_numPartitions, 0);
-        for (int i = 0; i < tmp; i++)
+        for (int i = 0; i < y_tmp; i++)
         {
             m_trCoeffY[i] = from->m_trCoeffY[i];
             m_iPCMSampleY[i] = from->m_iPCMSampleY[i];
         }
 
-        for (int i = 0; i < (tmp >> 2); i++)
+        for (int i = 0; i < c_tmp; i++)
         {
             m_trCoeffCb[i] = from->m_trCoeffCb[i];
             m_trCoeffCr[i] = from->m_trCoeffCr[i];
@@ -430,22 +437,18 @@
         m_cbf[2][i] = 0;
     }
 
-    uint32_t uiTmp = width * height;
-
-    {
-        m_cuMvField[0].clearMvField();
-        m_cuMvField[1].clearMvField();
-        uiTmp = width * height;
-
-        memset(m_trCoeffY,    0, uiTmp * sizeof(*m_trCoeffY));
-        memset(m_iPCMSampleY, 0, uiTmp * sizeof(*m_iPCMSampleY));
-
-        uiTmp >>= 2;
-        memset(m_trCoeffCb,    0, uiTmp * sizeof(*m_trCoeffCb));
-        memset(m_trCoeffCr,    0, uiTmp * sizeof(*m_trCoeffCr));
-        memset(m_iPCMSampleCb, 0, uiTmp * sizeof(*m_iPCMSampleCb));
-        memset(m_iPCMSampleCr, 0, uiTmp * sizeof(*m_iPCMSampleCr));
-    }
+    m_cuMvField[0].clearMvField();
+    m_cuMvField[1].clearMvField();
+
+    uint32_t tmp = width * height;
+    memset(m_trCoeffY,    0, tmp * sizeof(*m_trCoeffY));
+    memset(m_iPCMSampleY, 0, tmp * sizeof(*m_iPCMSampleY));
+
+    tmp = (width >> m_hChromaShift) * (height >> m_vChromaShift);
+    memset(m_trCoeffCb,    0, tmp * sizeof(*m_trCoeffCb));
+    memset(m_trCoeffCr,    0, tmp * sizeof(*m_trCoeffCr));
+    memset(m_iPCMSampleCb, 0, tmp * sizeof(*m_iPCMSampleCb));
+    memset(m_iPCMSampleCr, 0, tmp * sizeof(*m_iPCMSampleCr));
 }
 
 // initialize Sub partition
@@ -514,11 +517,13 @@
     uint32_t tmp = width * heigth;
     memset(m_trCoeffY, 0, sizeof(TCoeff) * tmp);
     memset(m_iPCMSampleY, 0, sizeof(Pel) * tmp);
-    tmp >>= 2;
+
+    tmp = (width >> m_hChromaShift) * (heigth >> m_vChromaShift);
     memset(m_trCoeffCb, 0, sizeof(TCoeff) * tmp);
     memset(m_trCoeffCr, 0, sizeof(TCoeff) * tmp);
     memset(m_iPCMSampleCb, 0, sizeof(Pel) * tmp);
     memset(m_iPCMSampleCr, 0, sizeof(Pel) * tmp);
+
     m_cuMvField[0].clearMvField();
     m_cuMvField[1].clearMvField();
 
@@ -594,17 +599,17 @@
     m_cuMvField[0].copyFrom(cu->getCUMvField(REF_PIC_LIST_0), cu->getTotalNumPart(), offset);
     m_cuMvField[1].copyFrom(cu->getCUMvField(REF_PIC_LIST_1), cu->getTotalNumPart(), offset);
 
-    uint32_t uiTmp  = g_maxCUWidth * g_maxCUHeight >> (depth << 1);
-    uint32_t uiTmp2 = partUnitIdx * uiTmp;
-    memcpy(m_trCoeffY  + uiTmp2, cu->getCoeffY(),  sizeof(TCoeff) * uiTmp);
-    memcpy(m_iPCMSampleY + uiTmp2, cu->getPCMSampleY(), sizeof(Pel) * uiTmp);
-
-    uiTmp >>= 2;
-    uiTmp2 >>= 2;
-    memcpy(m_trCoeffCb + uiTmp2, cu->getCoeffCb(), sizeof(TCoeff) * uiTmp);
-    memcpy(m_trCoeffCr + uiTmp2, cu->getCoeffCr(), sizeof(TCoeff) * uiTmp);
-    memcpy(m_iPCMSampleCb + uiTmp2, cu->getPCMSampleCb(), sizeof(Pel) * uiTmp);
-    memcpy(m_iPCMSampleCr + uiTmp2, cu->getPCMSampleCr(), sizeof(Pel) * uiTmp);
+    uint32_t tmp  = g_maxCUWidth * g_maxCUHeight >> (depth << 1);
+    uint32_t tmp2 = partUnitIdx * tmp;
+    memcpy(m_trCoeffY  + tmp2, cu->getCoeffY(),  sizeof(TCoeff) * tmp);
+    memcpy(m_iPCMSampleY + tmp2, cu->getPCMSampleY(), sizeof(Pel) * tmp);
+
+    tmp  = (g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift) >> (depth << 1);
+    tmp2 = partUnitIdx * tmp;
+    memcpy(m_trCoeffCb + tmp2, cu->getCoeffCb(), sizeof(TCoeff) * tmp);
+    memcpy(m_trCoeffCr + tmp2, cu->getCoeffCr(), sizeof(TCoeff) * tmp);
+    memcpy(m_iPCMSampleCb + tmp2, cu->getPCMSampleCb(), sizeof(Pel) * tmp);
+    memcpy(m_iPCMSampleCr + tmp2, cu->getPCMSampleCr(), sizeof(Pel) * tmp);
 }
 
 // Copy current predicted part to a CU in picture.
@@ -662,8 +667,8 @@
     memcpy(rpcCU->getCoeffY()     + tmp2, m_trCoeffY,    sizeof(TCoeff) * tmp);
     memcpy(rpcCU->getPCMSampleY() + tmp2, m_iPCMSampleY, sizeof(Pel) * tmp);
 
-    tmp >>= 2;
-    tmp2 >>= 2;
+    tmp  = ((g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_hChromaShift )) >> (uhDepth << 1);
+    tmp2 = m_absIdxInLCU * (m_pic->getMinCUWidth() >> m_hChromaShift) * (m_pic->getMinCUHeight() >> m_vChromaShift);
     memcpy(rpcCU->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(TCoeff) * tmp);
     memcpy(rpcCU->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(TCoeff) * tmp);
     memcpy(rpcCU->getPCMSampleCb() + tmp2, m_iPCMSampleCb, sizeof(Pel) * tmp);
@@ -723,8 +728,8 @@
     memcpy(cu->getCoeffY()  + tmp2, m_trCoeffY,  sizeof(TCoeff) * tmp);
     memcpy(cu->getPCMSampleY() + tmp2, m_iPCMSampleY, sizeof(Pel) * tmp);
 
-    tmp >>= 2;
-    tmp2 >>= 2;
+    tmp  = ((g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)) >> ((depth + partDepth) << 1);
+    tmp2 = partOffset * (m_pic->getMinCUWidth() >> m_hChromaShift) * (m_pic->getMinCUHeight() >> m_vChromaShift);
     memcpy(cu->getCoeffCb() + tmp2, m_trCoeffCb, sizeof(TCoeff) * tmp);
     memcpy(cu->getCoeffCr() + tmp2, m_trCoeffCr, sizeof(TCoeff) * tmp);
     memcpy(cu->getPCMSampleCb() + tmp2, m_iPCMSampleCb, sizeof(Pel) * tmp);
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.h	Tue Nov 05 14:53:10 2013 -0600
@@ -92,46 +92,49 @@
     // CU description
     // -------------------------------------------------------------------------------------------------------------------
 
-    uint32_t          m_cuAddr;         ///< CU address in a slice
-    uint32_t          m_absIdxInLCU;    ///< absolute address in a CU. It's Z scan order
-    uint32_t          m_cuPelX;         ///< CU position in a pixel (X)
-    uint32_t          m_cuPelY;         ///< CU position in a pixel (Y)
-    uint32_t          m_numPartitions;   ///< total number of minimum partitions in a CU
-    UChar*        m_width;         ///< array of widths
-    UChar*        m_height;        ///< array of heights
-    UChar*        m_depth;         ///< array of depths
-    int           m_unitSize;         ///< size of a "minimum partition"
-    uint32_t          m_unitMask;       ///< mask for mapping index to CompressMV field
+    uint32_t      m_cuAddr;          ///< CU address in a slice
+    uint32_t      m_absIdxInLCU;     ///< absolute address in a CU. It's Z scan order
+    uint32_t      m_cuPelX;          ///< CU position in a pixel (X)
+    uint32_t      m_cuPelY;          ///< CU position in a pixel (Y)
+    uint32_t      m_numPartitions;   ///< total number of minimum partitions in a CU
+    UChar*        m_width;           ///< array of widths
+    UChar*        m_height;          ///< array of heights
+    UChar*        m_depth;           ///< array of depths
+    int           m_chromaFormat;
+    int           m_hChromaShift;
+    int           m_vChromaShift;
+    int           m_unitSize;        ///< size of a "minimum partition"
+    uint32_t      m_unitMask;        ///< mask for mapping index to CompressMV field
 
     // -------------------------------------------------------------------------------------------------------------------
     // CU data
     // -------------------------------------------------------------------------------------------------------------------
-    bool*         m_skipFlag;         ///< array of skip flags
-    char*         m_partSizes;       ///< array of partition sizes
-    char*         m_predModes;       ///< array of prediction modes
+    bool*         m_skipFlag;           ///< array of skip flags
+    char*         m_partSizes;          ///< array of partition sizes
+    char*         m_predModes;          ///< array of prediction modes
     bool*         m_cuTransquantBypass; ///< array of cu_transquant_bypass flags
-    char*         m_qp;             ///< array of QP values
-    UChar*        m_trIdx;         ///< array of transform indices
-    UChar*        m_transformSkip[3]; ///< array of transform skipping flags
-    UChar*        m_cbf[3];        ///< array of coded block flags (CBF)
-    TComCUMvField m_cuMvField[2];   ///< array of motion vectors
-    TCoeff*       m_trCoeffY;       ///< transformed coefficient buffer (Y)
-    TCoeff*       m_trCoeffCb;      ///< transformed coefficient buffer (Cb)
-    TCoeff*       m_trCoeffCr;      ///< transformed coefficient buffer (Cr)
+    char*         m_qp;                 ///< array of QP values
+    UChar*        m_trIdx;              ///< array of transform indices
+    UChar*        m_transformSkip[3];   ///< array of transform skipping flags
+    UChar*        m_cbf[3];             ///< array of coded block flags (CBF)
+    TComCUMvField m_cuMvField[2];       ///< array of motion vectors
+    TCoeff*       m_trCoeffY;           ///< transformed coefficient buffer (Y)
+    TCoeff*       m_trCoeffCb;          ///< transformed coefficient buffer (Cb)
+    TCoeff*       m_trCoeffCr;          ///< transformed coefficient buffer (Cr)
 
-    Pel*          m_iPCMSampleY;    ///< PCM sample buffer (Y)
-    Pel*          m_iPCMSampleCb;   ///< PCM sample buffer (Cb)
-    Pel*          m_iPCMSampleCr;   ///< PCM sample buffer (Cr)
+    Pel*          m_iPCMSampleY;        ///< PCM sample buffer (Y)
+    Pel*          m_iPCMSampleCb;       ///< PCM sample buffer (Cb)
+    Pel*          m_iPCMSampleCr;       ///< PCM sample buffer (Cr)
 
     // -------------------------------------------------------------------------------------------------------------------
     // neighbor access variables
     // -------------------------------------------------------------------------------------------------------------------
 
-    TComDataCU*   m_cuAboveLeft;    ///< pointer of above-left CU
-    TComDataCU*   m_cuAboveRight;   ///< pointer of above-right CU
-    TComDataCU*   m_cuAbove;        ///< pointer of above CU
-    TComDataCU*   m_cuLeft;         ///< pointer of left CU
-    TComDataCU*   m_cuColocated[2]; ///< pointer of temporally colocated CU's for both directions
+    TComDataCU*   m_cuAboveLeft;     ///< pointer of above-left CU
+    TComDataCU*   m_cuAboveRight;    ///< pointer of above-right CU
+    TComDataCU*   m_cuAbove;         ///< pointer of above CU
+    TComDataCU*   m_cuLeft;          ///< pointer of left CU
+    TComDataCU*   m_cuColocated[2];  ///< pointer of temporally colocated CU's for both directions
     TComMvField   m_mvFieldA;        ///< motion vector of position A
     TComMvField   m_mvFieldB;        ///< motion vector of position B
     TComMvField   m_mvFieldC;        ///< motion vector of position C
@@ -141,14 +144,14 @@
     // -------------------------------------------------------------------------------------------------------------------
 
     bool*         m_bMergeFlags;      ///< array of merge flags
-    UChar*        m_mergeIndex;    ///< array of merge candidate indices
+    UChar*        m_mergeIndex;       ///< array of merge candidate indices
     bool          m_bIsMergeAMP;
-    UChar*        m_lumaIntraDir;  ///< array of intra directions (luma)
-    UChar*        m_chromaIntraDir; ///< array of intra directions (chroma)
-    UChar*        m_interDir;      ///< array of inter directions
-    char*         m_mvpIdx[2];     ///< array of motion vector predictor candidates
-    char*         m_mvpNum[2];     ///< array of number of possible motion vectors predictors
-    bool*         m_iPCMFlags;       ///< array of intra_pcm flags
+    UChar*        m_lumaIntraDir;     ///< array of intra directions (luma)
+    UChar*        m_chromaIntraDir;   ///< array of intra directions (chroma)
+    UChar*        m_interDir;         ///< array of inter directions
+    char*         m_mvpIdx[2];        ///< array of motion vector predictor candidates
+    char*         m_mvpNum[2];        ///< array of number of possible motion vectors predictors
+    bool*         m_iPCMFlags;        ///< array of intra_pcm flags
 
     // -------------------------------------------------------------------------------------------------------------------
     // misc. variables
@@ -185,7 +188,7 @@
     // create / destroy / initialize / copy
     // -------------------------------------------------------------------------------------------------------------------
 
-    void          create(uint32_t numPartition, uint32_t width, uint32_t height, int unitSize);
+    void          create(uint32_t numPartition, uint32_t width, uint32_t height, int unitSize, int csp);
     void          destroy();
 
     void          initCU(TComPic* pic, uint32_t cuAddr);
@@ -486,7 +489,7 @@
     // member functions for symbol prediction (most probable / mode conversion)
     // -------------------------------------------------------------------------------------------------------------------
 
-    uint32_t          getIntraSizeIdx(uint32_t absPartIdx);
+    uint32_t      getIntraSizeIdx(uint32_t absPartIdx);
 
     void          getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList);
     void          getIntraDirLumaPredictor(uint32_t absPartIdx, int32_t* intraDirPred, int32_t* mode = NULL);
@@ -495,19 +498,26 @@
     // member functions for SBAC context
     // -------------------------------------------------------------------------------------------------------------------
 
-    uint32_t          getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
-    uint32_t          getCtxQtCbf(TextType ttype, uint32_t trDepth);
+    uint32_t      getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
+    uint32_t      getCtxQtCbf(TextType ttype, uint32_t trDepth);
 
-    uint32_t          getCtxSkipFlag(uint32_t absPartIdx);
-    uint32_t          getCtxInterDir(uint32_t absPartIdx);
+    uint32_t      getCtxSkipFlag(uint32_t absPartIdx);
+    uint32_t      getCtxInterDir(uint32_t absPartIdx);
 
     // -------------------------------------------------------------------------------------------------------------------
     // member functions for RD cost storage
     // -------------------------------------------------------------------------------------------------------------------
 
-    uint32_t&         getTotalNumPart()               { return m_numPartitions; }
+    uint32_t&     getTotalNumPart()               { return m_numPartitions; }
 
-    uint32_t          getCoefScanIdx(uint32_t absPartIdx, uint32_t width, bool bIsLuma, bool bIsIntra);
+    uint32_t      getCoefScanIdx(uint32_t absPartIdx, uint32_t width, bool bIsLuma, bool bIsIntra);
+
+    // -------------------------------------------------------------------------------------------------------------------
+    // member functions to support multiple color space formats
+    // -------------------------------------------------------------------------------------------------------------------
+
+    int           getHorzChromaShift()  { return m_hChromaShift; }
+    int           getVertChromaShift()  { return m_vChromaShift; }
 };
 
 namespace RasterAddress {
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPattern.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -137,6 +137,9 @@
     UChar width        = cu->getWidth(0) >> partDepth;
     UChar height       = cu->getHeight(0) >> partDepth;
 
+    int hChromaShift = cu->getHorzChromaShift();
+    int vChromaShift = cu->getVertChromaShift();
+
     uint32_t absZOrderIdx  = cu->getZorderIdxInCU() + absPartIdx;
     uint32_t uiCurrPicPelX = cu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absZOrderIdx]];
     uint32_t uiCurrPicPelY = cu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absZOrderIdx]];
@@ -152,8 +155,8 @@
     }
 
     m_patternY.setPatternParamCU(cu, 0, width,      height,      offsetLeft, offsetAbove, absPartIdx);
-    m_patternCb.setPatternParamCU(cu, 1, width >> 1, height >> 1, offsetLeft, offsetAbove, absPartIdx);
-    m_patternCr.setPatternParamCU(cu, 2, width >> 1, height >> 1, offsetLeft, offsetAbove, absPartIdx);
+    m_patternCb.setPatternParamCU(cu, 1, width >> hChromaShift, height >> vChromaShift, offsetLeft, offsetAbove, absPartIdx);
+    m_patternCr.setPatternParamCU(cu, 2, width >> hChromaShift, height >> vChromaShift, offsetLeft, offsetAbove, absPartIdx);
 }
 
 void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, Pel* adiBuf,
@@ -333,8 +336,8 @@
     cu->deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, zOrderIdxInPart, partDepth);
     cu->deriveLeftBottomIdxAdi(partIdxLB,              zOrderIdxInPart, partDepth);
 
-    unitSize      = (g_maxCUWidth >> g_maxCUDepth) >> 1; // for chroma
-    numUnitsInCU  = (cuWidth / unitSize) >> 1;          // for chroma
+    unitSize      = (g_maxCUWidth >> g_maxCUDepth) >> cu->getHorzChromaShift(); // for chroma
+    numUnitsInCU  = (cuWidth / unitSize) >> cu->getHorzChromaShift();           // for chroma
     totalUnits    = (numUnitsInCU << 2) + 1;
 
     bNeighborFlags[numUnitsInCU * 2] = isAboveLeftAvailable(cu, partIdxLT);
@@ -344,8 +347,8 @@
     numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, bNeighborFlags + (numUnitsInCU * 2) - 1);
     numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, bNeighborFlags + numUnitsInCU   - 1);
 
-    cuWidth = cuWidth >> 1; // for chroma
-    cuHeight = cuHeight >> 1; // for chroma
+    cuWidth = cuWidth >> cu->getHorzChromaShift(); // for chroma
+    cuHeight = cuHeight >> cu->getVertChromaShift(); // for chroma
 
     width = cuWidth * 2 + 1;
     height = cuHeight * 2 + 1;
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPic.cpp
--- a/source/Lib/TLibCommon/TComPic.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPic.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -77,13 +77,13 @@
 void TComPic::create(TEncCfg* cfg)
 {
     m_picSym = new TComPicSym;
-    m_picSym->create(cfg->param.sourceWidth, cfg->param.sourceHeight, g_maxCUWidth, g_maxCUHeight, g_maxCUDepth);
+    m_picSym->create(cfg->param.sourceWidth, cfg->param.sourceHeight, cfg->getColorFormat(), g_maxCUWidth, g_maxCUHeight, g_maxCUDepth);
 
     m_origPicYuv = new TComPicYuv;
-    m_origPicYuv->create(cfg->param.sourceWidth, cfg->param.sourceHeight, g_maxCUWidth, g_maxCUHeight, g_maxCUDepth);
+    m_origPicYuv->create(cfg->param.sourceWidth, cfg->param.sourceHeight, cfg->getColorFormat(), g_maxCUWidth, g_maxCUHeight, g_maxCUDepth);
 
     m_reconPicYuv = new TComPicYuv;
-    m_reconPicYuv->create(cfg->param.sourceWidth, cfg->param.sourceHeight, g_maxCUWidth, g_maxCUHeight, g_maxCUDepth);
+    m_reconPicYuv->create(cfg->param.sourceWidth, cfg->param.sourceHeight, cfg->getColorFormat(), g_maxCUWidth, g_maxCUHeight, g_maxCUDepth);
 
     /* store conformance window parameters with picture */
     m_conformanceWindow = cfg->m_conformanceWindow;
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPicSym.cpp
--- a/source/Lib/TLibCommon/TComPicSym.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPicSym.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -63,7 +63,7 @@
     , m_cuData(NULL)
 {}
 
-void TComPicSym::create(int picWidth, int picHeight, uint32_t maxWidth, uint32_t maxHeight, uint32_t maxDepth)
+void TComPicSym::create(int picWidth, int picHeight, int picCsp, uint32_t maxWidth, uint32_t maxHeight, uint32_t maxDepth)
 {
     uint32_t i;
 
@@ -89,7 +89,7 @@
     for (i = 0; i < m_numCUsInFrame; i++)
     {
         m_cuData[i] = new TComDataCU;
-        m_cuData[i]->create(m_numPartitions, m_maxCUWidth, m_maxCUHeight, m_maxCUWidth >> m_totalDepth);
+        m_cuData[i]->create(m_numPartitions, m_maxCUWidth, m_maxCUHeight, m_maxCUWidth >> m_totalDepth, picCsp);
     }
 
     m_saoParam = NULL;
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPicSym.h
--- a/source/Lib/TLibCommon/TComPicSym.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPicSym.h	Tue Nov 05 14:53:10 2013 -0600
@@ -82,7 +82,7 @@
 
 public:
 
-    void        create(int picWidth, int picHeight, uint32_t maxWidth, uint32_t maxHeight, uint32_t maxDepth);
+    void        create(int picWidth, int picHeight, int picCsp, uint32_t maxWidth, uint32_t maxHeight, uint32_t maxDepth);
     void        destroy();
 
     TComPicSym();
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -63,10 +63,13 @@
 {
 }
 
-void TComPicYuv::create(int picWidth, int picHeight, uint32_t maxCUWidth, uint32_t maxCUHeight, uint32_t maxCUDepth)
+void TComPicYuv::create(int picWidth, int picHeight, int picCsp, uint32_t maxCUWidth, uint32_t maxCUHeight, uint32_t maxCUDepth)
 {
     m_picWidth  = picWidth;
     m_picHeight = picHeight;
+    m_hChromaShift = CHROMA_H_SHIFT(picCsp);
+    m_vChromaShift = CHROMA_V_SHIFT(picCsp);
+    m_picCsp = picCsp;
 
     // --> After config finished!
     m_cuWidth  = maxCUWidth;
@@ -76,12 +79,13 @@
     m_numCuInHeight = (m_picHeight + m_cuHeight - 1) / m_cuHeight;
 
     m_lumaMarginX = g_maxCUWidth  + 32; // search margin and 8-tap filter half-length, padded for 32-byte alignment
-    m_lumaMarginY = g_maxCUHeight + 16; // search margin plus 8 plus 8-tap filter half-length, rounded to 16
+    m_lumaMarginY = g_maxCUHeight + 16; // margin for 8-tap filter and infinite padding
     m_stride = (m_numCuInWidth * g_maxCUWidth) + (m_lumaMarginX << 1);
 
-    m_chromaMarginX = m_lumaMarginX;    // keep 16-byte alignment for chroma CTUs
-    m_chromaMarginY = m_lumaMarginY >> 1;
-    m_strideC = ((m_numCuInWidth * g_maxCUWidth) >> 1) + (m_chromaMarginX << 1);
+    m_chromaMarginX = m_lumaMarginX;       // keep 16-byte alignment for chroma CTUs
+    m_chromaMarginY = m_lumaMarginY >> m_vChromaShift;
+
+    m_strideC = ((m_numCuInWidth * g_maxCUWidth) >> m_hChromaShift) + (m_chromaMarginX * 2);
     int maxHeight = m_numCuInHeight * g_maxCUHeight;
 
     m_picBufY = (Pel*)X265_MALLOC(Pel, m_stride * (maxHeight + (m_lumaMarginY << 1)));
@@ -99,7 +103,7 @@
         for (int cuCol = 0; cuCol < m_numCuInWidth; cuCol++)
         {
             m_cuOffsetY[cuRow * m_numCuInWidth + cuCol] = getStride() * cuRow * m_cuHeight + cuCol * m_cuWidth;
-            m_cuOffsetC[cuRow * m_numCuInWidth + cuCol] = getCStride() * cuRow * (m_cuHeight / 2) + cuCol * (m_cuWidth / 2);
+            m_cuOffsetC[cuRow * m_numCuInWidth + cuCol] = getCStride() * cuRow * (m_cuHeight >> m_vChromaShift) + cuCol * (m_cuWidth >> m_hChromaShift);
         }
     }
 
@@ -110,7 +114,7 @@
         for (int buCol = 0; buCol < (1 << maxCUDepth); buCol++)
         {
             m_buOffsetY[(buRow << maxCUDepth) + buCol] = getStride() * buRow * (maxCUHeight >> maxCUDepth) + buCol * (maxCUWidth  >> maxCUDepth);
-            m_buOffsetC[(buRow << maxCUDepth) + buCol] = getCStride() * buRow * (maxCUHeight / 2 >> maxCUDepth) + buCol * (maxCUWidth / 2 >> maxCUDepth);
+            m_buOffsetC[(buRow << maxCUDepth) + buCol] = getCStride() * buRow * ((maxCUHeight >> m_vChromaShift) >> maxCUDepth) + buCol * ((maxCUWidth >> m_hChromaShift) >> maxCUDepth);
         }
     }
 }
@@ -196,8 +200,8 @@
     assert(m_picHeight == destPicYuv->getHeight());
 
     ::memcpy(destPicYuv->getBufY(), m_picBufY, sizeof(Pel) * (m_picWidth + (m_lumaMarginX << 1)) * (m_picHeight + (m_lumaMarginY << 1)));
-    ::memcpy(destPicYuv->getBufU(), m_picBufU, sizeof(Pel) * ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
-    ::memcpy(destPicYuv->getBufV(), m_picBufV, sizeof(Pel) * ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
+    ::memcpy(destPicYuv->getBufU(), m_picBufU, sizeof(Pel) * ((m_picWidth >> m_hChromaShift) + (m_chromaMarginX << 1)) * ((m_picHeight >> m_vChromaShift) + (m_chromaMarginY << 1)));
+    ::memcpy(destPicYuv->getBufV(), m_picBufV, sizeof(Pel) * ((m_picWidth >> m_hChromaShift) + (m_chromaMarginX << 1)) * ((m_picHeight >> m_vChromaShift) + (m_chromaMarginY << 1)));
 }
 
 void  TComPicYuv::copyToPicLuma(TComPicYuv* destPicYuv)
@@ -213,7 +217,7 @@
     assert(m_picWidth  == destPicYuv->getWidth());
     assert(m_picHeight == destPicYuv->getHeight());
 
-    ::memcpy(destPicYuv->getBufU(), m_picBufU, sizeof(Pel) * ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
+    ::memcpy(destPicYuv->getBufU(), m_picBufU, sizeof(Pel) * ((m_picWidth >> m_hChromaShift) + (m_chromaMarginX << 1)) * ((m_picHeight >> m_vChromaShift) + (m_chromaMarginY << 1)));
 }
 
 void  TComPicYuv::copyToPicCr(TComPicYuv* destPicYuv)
@@ -221,7 +225,7 @@
     assert(m_picWidth  == destPicYuv->getWidth());
     assert(m_picHeight == destPicYuv->getHeight());
 
-    ::memcpy(destPicYuv->getBufV(), m_picBufV, sizeof(Pel) * ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
+    ::memcpy(destPicYuv->getBufV(), m_picBufV, sizeof(Pel) * ((m_picWidth >> m_hChromaShift) + (m_chromaMarginX << 1)) * ((m_picHeight >> m_vChromaShift) + (m_chromaMarginY << 1)));
 }
 
 void TComPicYuv::xExtendPicCompBorder(Pel* recon, int stride, int width, int height, int iMarginX, int iMarginY)
@@ -291,25 +295,23 @@
     shift = X265_DEPTH - 8;
     offset = (shift > 0) ? (1 << (shift - 1)) : 0;
 
-    for (y = 0; y < m_picHeight >> 1; y++)
+    for (y = 0; y < m_picHeight >> m_vChromaShift; y++)
     {
-        for (x = 0; x < m_picWidth >> 1; x++)
+        for (x = 0; x < m_picWidth >> m_hChromaShift; x++)
         {
             uc = (UChar)Clip3<Pel>(0, 255, (pelCb[x] + offset) >> shift);
             fwrite(&uc, sizeof(UChar), 1, pFile);
         }
-
         pelCb += getCStride();
     }
 
-    for (y = 0; y < m_picHeight >> 1; y++)
+    for (y = 0; y < m_picHeight >> m_vChromaShift; y++)
     {
-        for (x = 0; x < m_picWidth >> 1; x++)
+        for (x = 0; x < m_picWidth >> m_hChromaShift; x++)
         {
             uc = (UChar)Clip3<Pel>(0, 255, (pelCr[x] + offset) >> shift);
             fwrite(&uc, sizeof(UChar), 1, pFile);
         }
-
         pelCr += getCStride();
     }
 
@@ -327,18 +329,16 @@
     Pel *Y = getLumaAddr();
     Pel *U = getCbAddr();
     Pel *V = getCrAddr();
-
-    uint8_t *y = (uint8_t*)pic.planes[0];
-    uint8_t *u = (uint8_t*)pic.planes[1];
-    uint8_t *v = (uint8_t*)pic.planes[2];
-
+        
     int padx = pad[0];
     int pady = pad[1];
 
 #if HIGH_BIT_DEPTH
-    if (sizeof(Pel) * 8 > pic.bitDepth)
+    if (pic.bitDepth > 8)
     {
-        assert(pic.bitDepth == 8);
+        uint16_t *y = (uint16_t*)pic.planes[0];
+        uint16_t *u = (uint16_t*)pic.planes[1];
+        uint16_t *v = (uint16_t*)pic.planes[2];
 
         /* width and height - without padsize */
         int width = m_picWidth - padx;
@@ -356,9 +356,9 @@
             y += pic.stride[0];
         }
 
-        for (int r = 0; r < height >> 1; r++)
+        for (int r = 0; r < height >> m_vChromaShift; r++)
         {
-            for (int c = 0; c < width >> 1; c++)
+            for (int c = 0; c < width >> m_hChromaShift; c++)
             {
                 U[c] = (Pel)u[c];
                 V[c] = (Pel)v[c];
@@ -385,12 +385,12 @@
                 Y += getStride();
             }
 
-            for (int r = 0; r < height >> 1; r++)
+            for (int r = 0; r < height >> m_vChromaShift; r++)
             {
-                for (int x = 0; x < padx >> 1; x++)
+                for (int x = 0; x < padx >> m_hChromaShift; x++)
                 {
-                    U[(width >> 1) + x] = U[(width >> 1) - 1];
-                    V[(width >> 1) + x] = V[(width >> 1) - 1];
+                    U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
+                    V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
                 }
                 U += getCStride();
                 V += getCStride();
@@ -402,22 +402,25 @@
         {
             width = m_picWidth;
             Y = getLumaAddr() + (height - 1) * getStride();
-            U = getCbAddr() + ((height >> 1) - 1) * getCStride();
-            V = getCrAddr() + ((height >> 1) - 1) * getCStride();
+            U = getCbAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
+            V = getCrAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
 
             for (uint32_t i = 1; i <= pady; i++)
                 memcpy(Y + i * getStride(), Y, width * sizeof(Pel));
 
-            for (uint32_t j = 1; j <= pady >> 1; j++)
+            for (uint32_t j = 1; j <= pady >> m_vChromaShift; j++)
             {
-                memcpy(U + j * getCStride(), U, (width >> 1) * sizeof(Pel));
-                memcpy(V + j * getCStride(), V, (width >> 1) * sizeof(Pel));
+                memcpy(U + j * getCStride(), U, (width >> m_hChromaShift) * sizeof(Pel));
+                memcpy(V + j * getCStride(), V, (width >> m_hChromaShift) * sizeof(Pel));
             }
         }
     }
     else
 #endif // if HIGH_BIT_DEPTH
     {
+        uint8_t *y = (uint8_t*)pic.planes[0];
+        uint8_t *u = (uint8_t*)pic.planes[1];
+        uint8_t *v = (uint8_t*)pic.planes[2];
 
         /* width and height - without padsize */
         int width = (m_picWidth * (pic.bitDepth > 8 ? 2 : 1)) - padx;
@@ -437,16 +440,16 @@
 
         }
 
-        for (int r = 0; r < height >> 1; r++)
+        for (int r = 0; r < height >> m_vChromaShift; r++)
         {
-            memcpy(U, u, width >> 1);
-            memcpy(V, v, width >> 1);
+            memcpy(U, u, width >> m_hChromaShift);
+            memcpy(V, v, width >> m_hChromaShift);
 
             /* extend the right if width is not multiple of the minimum CU size */
             if (padx)
             {
-                ::memset(U + (width >> 1), U[(width >> 1) - 1], padx >> 1);
-                ::memset(V + (width >> 1), V[(width >> 1) - 1], padx >> 1);
+                ::memset(U + (width >> m_hChromaShift), U[(width >> m_hChromaShift) - 1], padx >> m_hChromaShift);
+                ::memset(V + (width >> m_hChromaShift), V[(width >> m_hChromaShift) - 1], padx >> m_hChromaShift);
             }
 
             U += getCStride();
@@ -460,16 +463,16 @@
         {
             width = m_picWidth;
             Y = getLumaAddr() + (height - 1) * getStride();
-            U = getCbAddr() + ((height >> 1) - 1) * getCStride();
-            V = getCrAddr() + ((height >> 1) - 1) * getCStride();
+            U = getCbAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
+            V = getCrAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
 
             for (uint32_t i = 1; i <= pady; i++)
                 memcpy(Y + i * getStride(), Y, width * sizeof(pixel));
 
-            for (uint32_t j = 1; j <= pady >> 1; j++)
+            for (uint32_t j = 1; j <= pady >> m_vChromaShift; j++)
             {
-                memcpy(U + j * getCStride(), U, (width >> 1) * sizeof(pixel));
-                memcpy(V + j * getCStride(), V, (width >> 1) * sizeof(pixel));
+                memcpy(U + j * getCStride(), U, (width >> m_hChromaShift) * sizeof(pixel));
+                memcpy(V + j * getCStride(), V, (width >> m_hChromaShift) * sizeof(pixel));
             }
         }
     }
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Tue Nov 05 14:53:10 2013 -0600
@@ -77,6 +77,9 @@
     // ------------------------------------------------------------------------------------------------
     int   m_picWidth;          ///< Width of picture
     int   m_picHeight;         ///< Height of picture
+    int   m_picCsp;            ///< Picture color format
+    int   m_hChromaShift;
+    int   m_vChromaShift;
 
     int   m_cuWidth;           ///< Width of Coding Unit (CU)
     int   m_cuHeight;          ///< Height of Coding Unit (CU)
@@ -104,7 +107,7 @@
     //  Memory management
     // ------------------------------------------------------------------------------------------------
 
-    void  create(int picWidth, int picHeight, uint32_t maxCUWidth, uint32_t maxCUHeight, uint32_t maxCUDepth);
+    void  create(int picWidth, int picHeight, int csp, uint32_t maxCUWidth, uint32_t maxCUHeight, uint32_t maxCUDepth);
     void  destroy();
 
     void  createLuma(int picWidth, int picHeight, uint32_t maxCUWidth, uint32_t maxCUHeight, uint32_t maxCUDepth);
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPrediction.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -98,7 +98,8 @@
     }
 }
 
-void TComPrediction::initTempBuff()
+
+void TComPrediction::initTempBuff(int csp)
 {
     if (m_predBuf == NULL)
     {
@@ -107,10 +108,11 @@
         int i, j;
         for (i = 0; i < 4; i++)
         {
-            m_filteredBlockTmp[i].create(extWidth, extHeight + 7);
+            m_filteredBlockTmp[i].create(extWidth, extHeight + 7, csp);
+
             for (j = 0; j < 4; j++)
             {
-                m_filteredBlock[i][j].create(extWidth, extHeight);
+                m_filteredBlock[i][j].create(extWidth, extHeight, csp);
             }
         }
 
@@ -124,13 +126,12 @@
         refLeft = (Pel*)X265_MALLOC(Pel, 3 * MAX_CU_SIZE);
         refLeftFlt = (Pel*)X265_MALLOC(Pel, 3 * MAX_CU_SIZE);
 
-        // new structure
-        m_predYuv[0].create(MAX_CU_SIZE, MAX_CU_SIZE);
-        m_predYuv[1].create(MAX_CU_SIZE, MAX_CU_SIZE);
-        m_predShortYuv[0].create(MAX_CU_SIZE, MAX_CU_SIZE);
-        m_predShortYuv[1].create(MAX_CU_SIZE, MAX_CU_SIZE);
+        m_predYuv[0].create(MAX_CU_SIZE, MAX_CU_SIZE, csp);
+        m_predYuv[1].create(MAX_CU_SIZE, MAX_CU_SIZE, csp);
+        m_predShortYuv[0].create(MAX_CU_SIZE, MAX_CU_SIZE, csp);
+        m_predShortYuv[1].create(MAX_CU_SIZE, MAX_CU_SIZE, csp);
+        m_predTempYuv.create(MAX_CU_SIZE, MAX_CU_SIZE, csp);
 
-        m_predTempYuv.create(MAX_CU_SIZE, MAX_CU_SIZE);
         m_immedVals = (int16_t*)X265_MALLOC(int16_t, 64 * (64 + NTAPS_LUMA - 1));
     }
 
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComPrediction.h
--- a/source/Lib/TLibCommon/TComPrediction.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComPrediction.h	Tue Nov 05 14:53:10 2013 -0600
@@ -105,7 +105,7 @@
     TComPrediction();
     virtual ~TComPrediction();
 
-    void initTempBuff();
+    void initTempBuff(int csp);
 
     // inter
     void motionCompensation(TComDataCU* cu, TComYuv* predYuv, int picList = REF_PIC_LIST_X, int partIdx = -1, bool bLuma = true, bool bChroma = true);
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComYuv.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -62,18 +62,23 @@
 TComYuv::~TComYuv()
 {}
 
-void TComYuv::create(uint32_t width, uint32_t height)
+void TComYuv::create(uint32_t width, uint32_t height, int csp)
 {
+    m_hChromaShift = CHROMA_H_SHIFT(csp);
+    m_vChromaShift = CHROMA_V_SHIFT(csp);
+
     // memory allocation
     m_bufY = (Pel*)X265_MALLOC(Pel, width * height);
-    m_bufU = (Pel*)X265_MALLOC(Pel, width * height >> 2);
-    m_bufV = (Pel*)X265_MALLOC(Pel, width * height >> 2);
+    m_bufU = (Pel*)X265_MALLOC(Pel, (width >> m_hChromaShift) * (height >> m_vChromaShift));
+    m_bufV = (Pel*)X265_MALLOC(Pel, (width >> m_hChromaShift) * (height >> m_vChromaShift));
 
     // set width and height
     m_width   = width;
     m_height  = height;
-    m_cwidth  = width  >> 1;
-    m_cheight = height >> 1;
+
+    m_cwidth  = width  >> m_hChromaShift;
+    m_cheight = height >> m_vChromaShift;
+
 }
 
 void TComYuv::destroy()
@@ -239,7 +244,7 @@
     if (bLuma)
         copyPartToPartLuma(dstPicYuv, partIdx, width, height);
     if (bChroma)
-        copyPartToPartChroma(dstPicYuv, partIdx, width >> 1, height >> 1);
+        copyPartToPartChroma(dstPicYuv, partIdx, width >> m_hChromaShift, height >> m_vChromaShift);
 }
 
 void TComYuv::copyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
@@ -247,7 +252,7 @@
     if (bLuma)
         copyPartToPartLuma(dstPicYuv, partIdx, width, height);
     if (bChroma)
-        copyPartToPartChroma(dstPicYuv, partIdx, width >> 1, height >> 1);
+        copyPartToPartChroma(dstPicYuv, partIdx, width >> m_hChromaShift, height >> m_vChromaShift);
 }
 
 void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height)
@@ -378,13 +383,13 @@
 void TComYuv::addClip(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t trUnitIdx, uint32_t partSize)
 {
     addClipLuma(srcYuv0, srcYuv1, trUnitIdx, partSize);
-    addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> 1);
+    addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> m_hChromaShift);
 }
 
 void TComYuv::addClip(TComYuv* srcYuv0, TShortYUV* srcYuv1, uint32_t trUnitIdx, uint32_t partSize)
 {
     addClipLuma(srcYuv0, srcYuv1, trUnitIdx, partSize);
-    addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> 1);
+    addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> m_hChromaShift);
 }
 
 void TComYuv::addClipLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t trUnitIdx, uint32_t partSize)
@@ -587,8 +592,8 @@
         src1Stride = srcYuv1->getCStride();
         dststride  = getCStride();
 
-        width  >>= 1;
-        height >>= 1;
+        width  >>= m_hChromaShift;
+        height >>= m_vChromaShift;
 
         for (y = height - 1; y >= 0; y--)
         {
@@ -663,8 +668,8 @@
         src1Stride = srcYuv1->m_cwidth;
         dststride  = getCStride();
 
-        width  >>= 1;
-        height >>= 1;
+        width  >>= m_hChromaShift;
+        height >>= m_vChromaShift;
 
         for (y = height - 1; y >= 0; y--)
         {
@@ -692,7 +697,7 @@
 #define DISABLING_CLIP_FOR_BIPREDME 0  // x265 disables this flag so 8bpp and 16bpp outputs match
                                        // the intent is for all HM bipred to be replaced with x264 logic
 
-void TComYuv::removeHighFreq(TComYuv* srcYuv, uint32_t partIdx, uint32_t widht, uint32_t height)
+void TComYuv::removeHighFreq(TComYuv* srcYuv, uint32_t partIdx, uint32_t width, uint32_t height)
 {
     int x, y;
 
@@ -709,7 +714,7 @@
 
     for (y = height - 1; y >= 0; y--)
     {
-        for (x = widht - 1; x >= 0; x--)
+        for (x = width - 1; x >= 0; x--)
         {
 #if DISABLING_CLIP_FOR_BIPREDME
             dst[x] = (dst[x] << 1) - src[x];
@@ -725,12 +730,12 @@
     srcstride = srcYuv->getCStride();
     dststride = getCStride();
 
-    height >>= 1;
-    widht  >>= 1;
+    width  >>= m_hChromaShift;
+    height >>= m_vChromaShift;
 
     for (y = height - 1; y >= 0; y--)
     {
-        for (x = widht - 1; x >= 0; x--)
+        for (x = width - 1; x >= 0; x--)
         {
 #if DISABLING_CLIP_FOR_BIPREDME
             dstU[x] = (dstU[x] << 1) - srcU[x];
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComYuv.h	Tue Nov 05 14:53:10 2013 -0600
@@ -78,6 +78,9 @@
     uint32_t m_cwidth;
     uint32_t m_cheight;
 
+    int m_hChromaShift;
+    int m_vChromaShift;
+
     static int getAddrOffset(uint32_t partUnitIdx, uint32_t width)
     {
         int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]];
@@ -103,9 +106,9 @@
     //  Memory management
     // ------------------------------------------------------------------------------------------------------------------
 
-    void    create(uint32_t width, uint32_t height);              ///< Create  YUV buffer
-    void    destroy();                                      ///< Destroy YUV buffer
-    void    clear();                                        ///< clear   YUV buffer
+    void    create(uint32_t width, uint32_t height, int csp); ///< Create  YUV buffer
+    void    destroy();                                        ///< Destroy YUV buffer
+    void    clear();                                          ///< clear   YUV buffer
 
     // ------------------------------------------------------------------------------------------------------------------
     //  Copy, load, store YUV buffer
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibCommon/TypeDef.h
--- a/source/Lib/TLibCommon/TypeDef.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibCommon/TypeDef.h	Tue Nov 05 14:53:10 2013 -0600
@@ -100,6 +100,9 @@
     CHROMA_444  = 3
 };
 
+#define CHROMA_H_SHIFT(x) (x == CHROMA_420 || x == CHROMA_422)
+#define CHROMA_V_SHIFT(x) (x == CHROMA_420)
+
 /// supported partition shape
 enum PartSize
 {
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibEncoder/TEncCfg.h
--- a/source/Lib/TLibEncoder/TEncCfg.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibEncoder/TEncCfg.h	Tue Nov 05 14:53:10 2013 -0600
@@ -113,6 +113,8 @@
     int       m_gradualDecodingRefreshInfoEnabled;
     int       m_decodingUnitInfoSEIEnabled;
 
+    int       m_csp;
+
     //====== Weighted Prediction ========
 
     uint32_t  m_log2ParallelMergeLevelMinus2;                 ///< Parallel merge estimation region
@@ -252,6 +254,8 @@
 
     int getVideoFormat() { return m_videoFormat; }
 
+    int getColorFormat() { return m_csp; }
+
     bool getVideoFullRangeFlag() { return m_videoFullRangeFlag; }
 
     bool getColourDescriptionPresentFlag() { return m_colourDescriptionPresentFlag; }
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -103,6 +103,8 @@
 
     m_origYuv = new TComYuv*[m_totalDepth - 1];
 
+    int csp = m_cfg->getColorFormat();
+
     for (int i = 0; i < m_totalDepth - 1; i++)
     {
         uint32_t numPartitions = 1 << ((m_totalDepth - i - 1) << 1);
@@ -110,49 +112,58 @@
         uint32_t height = maxWidth >> i;
 
         m_bestCU[i] = new TComDataCU;
-        m_bestCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_bestCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_tempCU[i] = new TComDataCU;
-        m_tempCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_tempCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
 
         m_interCU_2Nx2N[i] = new TComDataCU;
-        m_interCU_2Nx2N[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_interCU_2Nx2N[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_interCU_2NxN[i] = new TComDataCU;
-        m_interCU_2NxN[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_interCU_2NxN[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_interCU_Nx2N[i] = new TComDataCU;
-        m_interCU_Nx2N[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_interCU_Nx2N[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_intraInInterCU[i] = new TComDataCU;
-        m_intraInInterCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_intraInInterCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_mergeCU[i] = new TComDataCU;
-        m_mergeCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_mergeCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_bestMergeCU[i] = new TComDataCU;
-        m_bestMergeCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1));
+        m_bestMergeCU[i]->create(numPartitions, width, height, maxWidth >> (m_totalDepth - 1), csp);
+
         m_bestPredYuv[i] = new TComYuv;
-        m_bestPredYuv[i]->create(width, height);
+        m_bestPredYuv[i]->create(width, height, csp);
+
         m_bestResiYuv[i] = new TShortYUV;
-        m_bestResiYuv[i]->create(width, height);
+        m_bestResiYuv[i]->create(width, height, csp);
+
         m_bestRecoYuv[i] = new TComYuv;
-        m_bestRecoYuv[i]->create(width, height);
+        m_bestRecoYuv[i]->create(width, height, csp);
 
         m_tmpPredYuv[i] = new TComYuv;
-        m_tmpPredYuv[i]->create(width, height);
+        m_tmpPredYuv[i]->create(width, height, csp);
 
         for (int j = 0; j < MAX_PRED_TYPES; j++)
         {
             m_modePredYuv[j][i] = new TComYuv;
-            m_modePredYuv[j][i]->create(width, height);
+            m_modePredYuv[j][i]->create(width, height, csp);
         }
 
         m_tmpResiYuv[i] = new TShortYUV;
-        m_tmpResiYuv[i]->create(width, height);
+        m_tmpResiYuv[i]->create(width, height, csp);
 
         m_tmpRecoYuv[i] = new TComYuv;
-        m_tmpRecoYuv[i]->create(width, height);
+        m_tmpRecoYuv[i]->create(width, height, csp);
 
         m_bestMergeRecoYuv[i] = new TComYuv;
-        m_bestMergeRecoYuv[i]->create(width, height);
+        m_bestMergeRecoYuv[i]->create(width, height, csp);
 
         m_origYuv[i] = new TComYuv;
-        m_origYuv[i]->create(width, height);
+        m_origYuv[i]->create(width, height, csp);
     }
 
     m_bEncodeDQP = false;
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -151,7 +151,7 @@
         }
     }
 
-    initTempBuff();
+    initTempBuff(cfg->getColorFormat());
 
     m_tempPel = new Pel[g_maxCUWidth * g_maxCUHeight];
 
@@ -166,12 +166,17 @@
     m_qtTempCbf[1]  = new UChar[numPartitions];
     m_qtTempCbf[2]  = new UChar[numPartitions];
     m_qtTempTComYuv  = new TShortYUV[numLayersToAllocate];
+
+    m_hChromaShift = CHROMA_H_SHIFT(cfg->getColorFormat());
+    m_vChromaShift = CHROMA_V_SHIFT(cfg->getColorFormat());
+
     for (uint32_t i = 0; i < numLayersToAllocate; ++i)
     {
         m_qtTempCoeffY[i]  = new TCoeff[g_maxCUWidth * g_maxCUHeight];
-        m_qtTempCoeffCb[i] = new TCoeff[g_maxCUWidth * g_maxCUHeight >> 2];
-        m_qtTempCoeffCr[i] = new TCoeff[g_maxCUWidth * g_maxCUHeight >> 2];
-        m_qtTempTComYuv[i].create(g_maxCUWidth, g_maxCUHeight);
+
+        m_qtTempCoeffCb[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];
+        m_qtTempCoeffCr[i] = new TCoeff[(g_maxCUWidth >> m_hChromaShift) * (g_maxCUHeight >> m_vChromaShift)];
+        m_qtTempTComYuv[i].create(g_maxCUWidth, g_maxCUHeight, cfg->getColorFormat());
     }
 
     m_sharedPredTransformSkip[0] = new Pel[MAX_TS_WIDTH * MAX_TS_HEIGHT];
@@ -180,12 +185,13 @@
     m_qtTempTUCoeffY  = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
     m_qtTempTUCoeffCb = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
     m_qtTempTUCoeffCr = new TCoeff[MAX_TS_WIDTH * MAX_TS_HEIGHT];
-    m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight);
+
+    m_qtTempTransformSkipTComYuv.create(g_maxCUWidth, g_maxCUHeight, cfg->getColorFormat());
 
     m_qtTempTransformSkipFlag[0] = new UChar[numPartitions];
     m_qtTempTransformSkipFlag[1] = new UChar[numPartitions];
     m_qtTempTransformSkipFlag[2] = new UChar[numPartitions];
-    m_tmpYuvPred.create(MAX_CU_SIZE, MAX_CU_SIZE);
+    m_tmpYuvPred.create(MAX_CU_SIZE, MAX_CU_SIZE, cfg->getColorFormat());
 }
 
 void TEncSearch::setQPLambda(int QP, double lambdaLuma, double lambdaChroma)
@@ -530,8 +536,8 @@
 
     TextType ttype          = (chromaId > 0 ? TEXT_CHROMA_V : TEXT_CHROMA_U);
     uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
-    uint32_t width          = cu->getWidth(0) >> (trDepth + 1);
-    uint32_t height         = cu->getHeight(0) >> (trDepth + 1);
+    uint32_t width          = cu->getWidth(0) >> (trDepth + m_hChromaShift);
+    uint32_t height         = cu->getHeight(0) >> (trDepth + m_vChromaShift);
     uint32_t stride         = fencYuv->getCStride();
     Pel*     fenc           = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
     Pel*     pred           = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
diff -r 695e69ec99db -r dace992d1d66 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Tue Nov 05 14:53:10 2013 -0600
@@ -107,7 +107,11 @@
     Pel*            m_tempPel;    // avoid mallocs in xEstimateResidualQT
 
     // AMVP cost of a given mvp index for a given mvp candidate count
-    uint32_t            m_mvpIdxCost[AMVP_MAX_NUM_CANDS + 1][AMVP_MAX_NUM_CANDS + 1];
+    uint32_t        m_mvpIdxCost[AMVP_MAX_NUM_CANDS + 1][AMVP_MAX_NUM_CANDS + 1];
+
+    // Color space parameters
+    int             m_hChromaShift;
+    int             m_vChromaShift;
 
 public:
 
diff -r 695e69ec99db -r dace992d1d66 source/common/TShortYUV.cpp
--- a/source/common/TShortYUV.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/common/TShortYUV.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -43,17 +43,21 @@
 TShortYUV::~TShortYUV()
 {}
 
-void TShortYUV::create(unsigned int width, unsigned int height)
+void TShortYUV::create(unsigned int width, unsigned int height, int csp)
 {
+    m_hChromaShift = CHROMA_H_SHIFT(csp);
+    m_vChromaShift = CHROMA_V_SHIFT(csp);
+
     m_bufY  = (int16_t*)X265_MALLOC(int16_t, width * height);
-    m_bufCb = (int16_t*)X265_MALLOC(int16_t, width * height >> 2);
-    m_bufCr = (int16_t*)X265_MALLOC(int16_t, width * height >> 2);
+    m_bufCb = (int16_t*)X265_MALLOC(int16_t, (width >> m_hChromaShift) * (height >> m_vChromaShift));
+    m_bufCr = (int16_t*)X265_MALLOC(int16_t, (width >> m_hChromaShift) * (height >> m_vChromaShift));
 
     // set width and height
-    m_width   = width;
-    m_height  = height;
-    m_cwidth  = width  >> 1;
-    m_cheight = height >> 1;
+    m_width  = width;
+    m_height = height;
+
+    m_cwidth  = width  >> m_hChromaShift;
+    m_cheight = height >> m_vChromaShift;
 }
 
 void TShortYUV::destroy()
@@ -75,8 +79,8 @@
 
 void TShortYUV::subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize)
 {
-    subtractLuma(srcYuv0, srcYuv1,  trUnitIdx, partSize);
-    subtractChroma(srcYuv0, srcYuv1,  trUnitIdx, partSize >> 1);
+    subtractLuma(srcYuv0, srcYuv1, trUnitIdx, partSize);
+    subtractChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> m_hChromaShift);
 }
 
 void TShortYUV::subtractLuma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize)
@@ -116,7 +120,7 @@
 void TShortYUV::addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int trUnitIdx, unsigned int partSize)
 {
     addClipLuma(srcYuv0, srcYuv1, trUnitIdx, partSize);
-    addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> 1);
+    addClipChroma(srcYuv0, srcYuv1, trUnitIdx, partSize >> m_hChromaShift);
 }
 
 #if _MSC_VER
@@ -160,13 +164,13 @@
 void TShortYUV::copyPartToPartYuv(TShortYUV* dstPicYuv, unsigned int partIdx, unsigned int width, unsigned int height)
 {
     copyPartToPartLuma(dstPicYuv, partIdx, width, height);
-    copyPartToPartChroma(dstPicYuv, partIdx, width >> 1, height >> 1);
+    copyPartToPartChroma(dstPicYuv, partIdx, width >> m_hChromaShift, height >> m_vChromaShift);
 }
 
 void TShortYUV::copyPartToPartYuv(TComYuv* dstPicYuv, unsigned int partIdx, unsigned int width, unsigned int height)
 {
     copyPartToPartLuma(dstPicYuv, partIdx, width, height);
-    copyPartToPartChroma(dstPicYuv, partIdx, width >> 1, height >> 1);
+    copyPartToPartChroma(dstPicYuv, partIdx, width >> m_hChromaShift, height >> m_vChromaShift);
 }
 
 void TShortYUV::copyPartToPartLuma(TShortYUV* dstPicYuv, unsigned int partIdx, unsigned int width, unsigned int height)
diff -r 695e69ec99db -r dace992d1d66 source/common/TShortYUV.h
--- a/source/common/TShortYUV.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/common/TShortYUV.h	Tue Nov 05 14:53:10 2013 -0600
@@ -64,10 +64,14 @@
     unsigned int m_cwidth;
     unsigned int m_cheight;
 
+    int m_hChromaShift;
+    int m_vChromaShift;
+
     TShortYUV();
     virtual ~TShortYUV();
 
-    void create(unsigned int width, unsigned int height);
+    void create(unsigned int width, unsigned int height, int csp);
+
     void destroy();
     void clear();
 
diff -r 695e69ec99db -r dace992d1d66 source/common/common.cpp
--- a/source/common/common.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/common/common.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -819,3 +819,31 @@
 
     return buf;
 }
+
+const char * const source_csp_names[] =
+{
+    "i420",
+    "i422",
+    "i444",
+    0
+};
+
+const uint8_t source_csp_fix[] = {
+    X265_CSP_I420,
+    X265_CSP_I422,
+    X265_CSP_I444
+};
+
+void parseCspName(const char *arg, int *dst)
+{
+    int csp = 0;
+    for (int i = 0; source_csp_names[i]; i++)
+    {
+        if (!strcmp(arg, source_csp_names[i]))
+        {
+            csp = i;
+        }
+    }
+
+    *dst = source_csp_fix[csp];
+}
diff -r 695e69ec99db -r dace992d1d66 source/common/common.h
--- a/source/common/common.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/common/common.h	Tue Nov 05 14:53:10 2013 -0600
@@ -117,5 +117,6 @@
 int x265_set_globals(x265_param *param);
 int x265_exp2fix8(double x);
 char *x265_param2string(x265_param *p);
+void parseCspName(const char *arg, int *dst);
 
 #endif // ifndef X265_COMMON_H
diff -r 695e69ec99db -r dace992d1d66 source/encoder/cturow.cpp
--- a/source/encoder/cturow.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/encoder/cturow.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -56,8 +56,8 @@
     m_search.setEntropyCoder(&m_entropyCoder);
     m_search.setRDGoOnSbacCoder(&m_rdGoOnSbacCoder);
 
+    m_cuCoder.init(top);
     m_cuCoder.create((UChar)g_maxCUDepth, g_maxCUWidth);
-    m_cuCoder.init(top);
     m_cuCoder.setRdCost(&m_rdCost);
     m_cuCoder.setRDSbacCoder(m_rdSbacCoders);
     m_cuCoder.setEntropyCoder(&m_entropyCoder);
diff -r 695e69ec99db -r dace992d1d66 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/encoder/encoder.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -1080,6 +1080,12 @@
 
     //====== Coding Tools ========
 
+    if (_param->sourceCsp != X265_CSP_NONE)
+    {
+        m_csp = _param->sourceCsp >= X265_CSP_I444 ? CHROMA_444 :
+                _param->sourceCsp >= X265_CSP_I422 ? CHROMA_422 : CHROMA_420;
+    }
+
     uint32_t tuQTMaxLog2Size = g_convertToBit[_param->maxCUSize] + 2 - 1;
     m_quadtreeTULog2MaxSize = tuQTMaxLog2Size;
     uint32_t tuQTMinLog2Size = 2; //log2(4)
diff -r 695e69ec99db -r dace992d1d66 source/input/input.h
--- a/source/input/input.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/input/input.h	Tue Nov 05 14:53:10 2013 -0600
@@ -53,12 +53,16 @@
 
     virtual void setBitDepth(uint32_t bitDepth) = 0;
 
+    virtual void setColorSpace(int csp) = 0;
+
     virtual float getRate() const = 0;
 
     virtual int getWidth() const = 0;
 
     virtual int getHeight() const = 0;
 
+    virtual int getColorSpace() const = 0;
+
     virtual void startReader() = 0;
 
     virtual void release() = 0;
diff -r 695e69ec99db -r dace992d1d66 source/input/y4m.cpp
--- a/source/input/y4m.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/input/y4m.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -41,12 +41,11 @@
 
 Y4MInput::Y4MInput(const char *filename, uint32_t /*inputBitDepth*/)
 {
-
 #if defined ENABLE_THREAD
     for (uint32_t i = 0; i < QUEUE_SIZE; i++)
-        buf[i] = NULL;
+        plane[i][2] = plane[i][1] = plane[i][0] = NULL;
 #else
-    buf = NULL;
+    plane[0][2] = plane[0][1] = plane[0][0] = NULL;
 #endif
 
     ifs = NULL;
@@ -71,16 +70,11 @@
             tail = 0;
             for (uint32_t i = 0; i < QUEUE_SIZE; i++)
             {
-                buf[i] = new char[3 * width * height / 2];
-                if (buf[i] == NULL)
-                {
-                    x265_log(NULL, X265_LOG_ERROR, "y4m: buffer allocation failure, aborting\n");
-                    threadActive = false;
-                }
+                pictureAlloc(i);
             }
-#else // if defined(ENABLE_THREAD)
-            buf = new char[3 * width * height / 2];
-#endif // if defined(ENABLE_THREAD)
+#else 
+            pictureAlloc(0);
+#endif
         }
     }
     if (!threadActive && ifs && ifs != &cin)
@@ -97,12 +91,30 @@
 #if defined(ENABLE_THREAD)
     for (uint32_t i = 0; i < QUEUE_SIZE; i++)
     {
-        delete[] buf[i];
+        for(int j = 0; j < x265_cli_csps[colorSpace].planes; j++)
+            delete[] plane[i][j];
     }
+#else
+    for(int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+        delete[] plane[0][i];
+#endif
+}
 
-#else
-    delete[] buf;
-#endif
+void Y4MInput::pictureAlloc(int queueindex)
+{
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        plane_size[i] = (uint32_t)((width >> x265_cli_csps[colorSpace].width[i]) * (height >> x265_cli_csps[colorSpace].height[i]));
+        plane[queueindex][i] = new char[plane_size[i]];
+        plane_stride[i] = (uint32_t)( width >> x265_cli_csps[colorSpace].width[i]);
+
+        if (plane[queueindex][i] == NULL)
+        {
+            x265_log(NULL, X265_LOG_ERROR, "y4m: buffer allocation failure, aborting");
+            threadActive = false;
+            return;
+        }
+    }
 }
 
 bool Y4MInput::parseHeader()
@@ -114,6 +126,8 @@
     height = 0;
     rateNum = 0;
     rateDenom = 0;
+    colorSpace = X265_CSP_I420;
+    int csp = 0;
 
     while (!ifs->eof())
     {
@@ -212,6 +226,24 @@
 
                 break;
 
+            case 'C':
+                while (!ifs->eof())
+                {
+                    c = ifs->get();
+
+                    if (c == ' ' || c == '\n')
+                    {
+                        break;
+                    }
+                    else
+                    {
+                        csp = csp * 10 + (c - '0');
+                    }
+                }
+
+                colorSpace = (csp == 444) ? X265_CSP_I444 : (csp == 422) ? X265_CSP_I422 : X265_CSP_I420;
+                break;
+
             default:
                 while (!ifs->eof())
                 {
@@ -233,7 +265,8 @@
 
     if (width < MIN_FRAME_WIDTH || width > MAX_FRAME_WIDTH ||
         height < MIN_FRAME_HEIGHT || width > MAX_FRAME_HEIGHT ||
-        (rateNum / rateDenom) < 1 || (rateNum / rateDenom) > MAX_FRAME_RATE)
+        (rateNum / rateDenom) < 1 || (rateNum / rateDenom) > MAX_FRAME_RATE ||
+        colorSpace <= X265_CSP_NONE || colorSpace >= X265_CSP_MAX)
         return false;
 
     return true;
@@ -255,7 +288,13 @@
     if (size < 0)
         return -1;
 
-    return (int)((size - cur) / ((width * height * 3 / 2) + strlen(header) + 1));
+    int plane_size = 0;
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        plane_size += (uint32_t)((width >> x265_cli_csps[colorSpace].width[i]) * (height >> x265_cli_csps[colorSpace].height[i]));
+    }
+
+    return (int)((size - cur) / (plane_size + strlen(header) + 1));
 }
 
 void Y4MInput::skipFrames(uint32_t numFrames)
@@ -281,17 +320,12 @@
     if (!frameStat[head])
         return false;
 
-    pic.planes[0] = buf[head];
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        pic.planes[i] = plane[head][i];
+        pic.stride[i] = plane_stride[i]; 
+    }
 
-    pic.planes[1] = buf[head] + width * height;
-
-    pic.planes[2] = buf[head] + width * height + ((width * height) >> 2);
-
-    pic.bitDepth = 8;
-
-    pic.stride[0] = width;
-
-    pic.stride[1] = pic.stride[2] = pic.stride[0] >> 1;
     head = (head + 1) % QUEUE_SIZE;
     notFull.trigger();
 
@@ -341,7 +375,6 @@
         c = ifs->get();
     }
 
-    const size_t count = width * height * 3 / 2;
     while ((tail + 1) % QUEUE_SIZE == head)
     {
         notFull.wait();
@@ -349,7 +382,11 @@
             return false;
     }
 
-    ifs->read(buf[tail], count);
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        ifs->read(plane[tail][i], plane_size[i]);
+    }
+
     frameStat[tail] = !ifs->fail();
     tail = (tail + 1) % QUEUE_SIZE;
     notEmpty.trigger();
@@ -381,19 +418,12 @@
         c = ifs->get();
     }
 
-    const size_t count = width * height * 3 / 2;
-
-    pic.planes[0] = buf;
-
-    pic.planes[1] = buf + width * height;
-
-    pic.planes[2] = buf + width * height + ((width * height) >> 2);
-
-    pic.bitDepth = 8;
-
-    pic.stride[0] = width;
-
-    pic.stride[1] = pic.stride[2] = pic.stride[0] >> 1;
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        ifs->read(plane[0][i], plane_size[i]);
+        pic.planes[i] = plane[0][i];
+        pic.stride[i] = plane_stride[i]; 
+    }
 
     ifs->read(buf, count);
     PPAStopCpuEventFunc(read_yuv);
diff -r 695e69ec99db -r dace992d1d66 source/input/y4m.h
--- a/source/input/y4m.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/input/y4m.h	Tue Nov 05 14:53:10 2013 -0600
@@ -51,6 +51,12 @@
 
     int height;
 
+    int colorSpace;   ///< source Color Space Parameter
+
+    uint32_t plane_size[3];
+
+    uint32_t plane_stride[3];
+
     bool threadActive;
 
 #if defined(ENABLE_THREAD)
@@ -60,13 +66,13 @@
 
     bool frameStat[QUEUE_SIZE];
 
-    char* buf[QUEUE_SIZE];
+    char* plane[QUEUE_SIZE][3];
 
     Event notFull;
 
     Event notEmpty;
 #else // if defined(ENABLE_THREAD)
-    char *buf;
+    char *plane[1][3];
 #endif // if defined(ENABLE_THREAD)
     std::istream *ifs;
 
@@ -78,19 +84,23 @@
 
     virtual ~Y4MInput();
 
-    void setDimensions(int, int)                  { /* ignore, warn */ }
+    void setDimensions(int, int)  { /* ignore, warn */ }
 
-    void setBitDepth(uint32_t)                         { /* ignore, warn */ }
+    void setBitDepth(uint32_t)    { /* ignore, warn */ }
 
-    float getRate() const                         { return ((float)rateNum) / rateDenom; }
+    void setColorSpace(int)       { /* ignore, warn */}
 
-    int getWidth() const                          { return width; }
+    float getRate() const         { return ((float)rateNum) / rateDenom; }
 
-    int getHeight() const                         { return height; }
+    int getWidth() const          { return width; }
 
-    bool isEof() const                            { return (ifs && ifs->eof()); }
+    int getHeight() const         { return height; }
 
-    bool isFail()                                 { return !(ifs && !ifs->fail() && threadActive); }
+    int getColorSpace() const     { return colorSpace; }
+
+    bool isEof() const            { return (ifs && ifs->eof()); }
+
+    bool isFail()                 { return !(ifs && !ifs->fail() && threadActive); }
 
     void startReader();
 
@@ -102,6 +112,8 @@
 
     bool readPicture(x265_picture&);
 
+    void pictureAlloc(int index);
+
 #if defined(ENABLE_THREAD)
 
     void threadMain();
@@ -110,7 +122,7 @@
 
 #endif
 
-    const char *getName() const                   { return "y4m"; }
+    const char *getName() const   { return "y4m"; }
 };
 }
 
diff -r 695e69ec99db -r dace992d1d66 source/input/yuv.cpp
--- a/source/input/yuv.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/input/yuv.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -233,7 +233,8 @@
 
     pic.bitDepth = depth;
 
-    pic.stride[0] = width * pixelbytes;
+    /* Stride is measured in pixels */
+    pic.stride[0] = width;
 
     pic.stride[1] = pic.stride[2] = pic.stride[0] >> 1;
 
diff -r 695e69ec99db -r dace992d1d66 source/input/yuv.h
--- a/source/input/yuv.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/input/yuv.h	Tue Nov 05 14:53:10 2013 -0600
@@ -47,6 +47,8 @@
 
     int height;
 
+    int colorSpace; //< source Color Space Parameter
+
     uint32_t depth;
 
     uint32_t pixelbytes;
@@ -81,7 +83,9 @@
 
     void setDimensions(int w, int h);
 
-    void setBitDepth(uint32_t bitDepth)                { depth = bitDepth; }
+    void setColorSpace(int csp)                   { colorSpace = csp; }
+
+    void setBitDepth(uint32_t bitDepth)           { depth = bitDepth; }
 
     float getRate() const                         { return 0.0f; }
 
@@ -89,6 +93,8 @@
 
     int getHeight() const                         { return height; }
 
+    int getColorSpace() const                     { return colorSpace; }
+
     bool isEof() const                            { return (ifs && ifs->eof()); }
 
     bool isFail()                                 { return !(ifs && !ifs->fail() && threadActive); }
diff -r 695e69ec99db -r dace992d1d66 source/output/output.cpp
--- a/source/output/output.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/output/output.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -28,12 +28,12 @@
 
 using namespace x265;
 
-Output* Output::open(const char *fname, int width, int height, uint32_t bitdepth, int rate)
+Output* Output::open(const char *fname, int width, int height, uint32_t bitdepth, int rate, int csp)
 {
     const char * s = strrchr(fname, '.');
 
     if (s && !strcmp(s, ".y4m"))
-        return new Y4MOutput(fname, width, height, rate);
+        return new Y4MOutput(fname, width, height, rate, csp);
     else
-        return new YUVOutput(fname, width, height, bitdepth);
+        return new YUVOutput(fname, width, height, bitdepth, csp);
 }
diff -r 695e69ec99db -r dace992d1d66 source/output/output.h
--- a/source/output/output.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/output/output.h	Tue Nov 05 14:53:10 2013 -0600
@@ -39,7 +39,7 @@
 
     Output()           {}
 
-    static Output* open(const char *fname, int width, int height, uint32_t bitdepth, int rate);
+    static Output* open(const char *fname, int width, int height, uint32_t bitdepth, int rate, int csp);
 
     virtual bool isFail() const = 0;
 
diff -r 695e69ec99db -r dace992d1d66 source/output/y4m.cpp
--- a/source/output/y4m.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/output/y4m.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -21,6 +21,7 @@
  * For more information, contact us at licensing at multicorewareinc.com.
  *****************************************************************************/
 
+#include "common.h"
 #include "PPA/ppa.h"
 #include "common.h"
 #include "output.h"
@@ -29,17 +30,27 @@
 using namespace x265;
 using namespace std;
 
-Y4MOutput::Y4MOutput(const char *filename, int w, int h, int rate)
+Y4MOutput::Y4MOutput(const char *filename, int w, int h, int rate, int csp)
     : width(w)
     , height(h)
+    , colorSpace(csp)
+    , frameSize(0)
 {
     ofs.open(filename, ios::binary | ios::out);
     buf = new char[width];
+
+    char *cf = (csp >= X265_CSP_I444) ? "444" : (csp >= X265_CSP_I422) ? "422" : "420";
+
     if (ofs)
     {
-        ofs << "YUV4MPEG2 W" << width << " H" << height << " F" << rate << ":1 Ip C420\n";
+        ofs << "YUV4MPEG2 W" << width << " H" << height << " F" << rate << ":1 Ip" << " C" << cf << "\n";
         header = ofs.tellp();
     }
+
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        frameSize += (uint32_t)((width >> x265_cli_csps[colorSpace].width[i]) * (height >> x265_cli_csps[colorSpace].height[i]));
+    }
 }
 
 Y4MOutput::~Y4MOutput()
@@ -51,73 +62,44 @@
 bool Y4MOutput::writePicture(const x265_picture& pic)
 {
     PPAStartCpuEventFunc(write_yuv);
-    std::ofstream::pos_type frameSize = (6 + 3 * (width * height) / 2);
-    ofs.seekp(header + frameSize * pic.poc); 
+    std::ofstream::pos_type outPicPos = header;
+    outPicPos += pic.poc * (6 + frameSize);
+    ofs.seekp(outPicPos);
     ofs << "FRAME\n";
 
     if (pic.bitDepth > 8)
     {
+        // encoder gave us short pixels, downshift, then write
+        int shift = pic.bitDepth - 8;
         if (pic.poc == 0)
         {
             x265_log(NULL, X265_LOG_WARNING, "y4m: down-shifting reconstructed pixels to 8 bits\n");
         }
-        // encoder gave us short pixels, downshift, then write
-        uint16_t *Y = (uint16_t*)pic.planes[0];
-        int shift = pic.bitDepth - 8;
-        for (int i = 0; i < height; i++)
+        for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
         {
-            for (int j = 0; j < width; j++)
+            uint16_t *src = (uint16_t*)pic.planes[i];
+            for (int h = 0; h < height >> x265_cli_csps[colorSpace].height[i]; h++)
             {
-                buf[j] = (char)(Y[j] >> shift);
+                for (int w = 0; w < width >> x265_cli_csps[colorSpace].width[i]; w++)
+                {
+                    buf[w] = (char)(src[w] >> shift);
+                }
+
+                ofs.write(buf, width >> x265_cli_csps[colorSpace].width[i]);
+                src += pic.stride[i];
             }
-
-            ofs.write(buf, width);
-            Y += pic.stride[0];
-        }
-        uint16_t *U = (uint16_t*)pic.planes[1];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            for (int j = 0; j < width >> 1; j++)
-            {
-                buf[j] = (char)(U[j] >> shift);
-            }
-
-            ofs.write(buf, width >> 1);
-            U += pic.stride[1];
-        }
-        uint16_t *V = (uint16_t*)pic.planes[2];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            for (int j = 0; j < width >> 1; j++)
-            {
-                buf[j] = (char)(V[j] >> shift);
-            }
-
-            ofs.write(buf, width >> 1);
-            V += pic.stride[2];
         }
     }
     else
     {
-        char *Y = (char*)pic.planes[0];
-        for (int i = 0; i < height; i++)
+        for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
         {
-            ofs.write(Y, width);
-            Y += pic.stride[0];
-        }
-
-        char *U = (char*)pic.planes[1];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            ofs.write(U, width >> 1);
-            U += pic.stride[1];
-        }
-
-        char *V = (char*)pic.planes[2];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            ofs.write(V, width >> 1);
-            V += pic.stride[2];
+            char *src = (char*)pic.planes[i];
+            for (int h = 0; h < height >> x265_cli_csps[colorSpace].height[i]; h++)
+            {
+                ofs.write(src, width >> x265_cli_csps[colorSpace].width[i]);
+                src += pic.stride[i];
+            }
         }
     }
 
diff -r 695e69ec99db -r dace992d1d66 source/output/y4m.h
--- a/source/output/y4m.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/output/y4m.h	Tue Nov 05 14:53:10 2013 -0600
@@ -38,6 +38,10 @@
 
     int height;
 
+    int colorSpace;
+
+    uint32_t frameSize;
+
     std::ofstream ofs;
 
     std::ofstream::pos_type header;
@@ -48,7 +52,7 @@
 
 public:
 
-    Y4MOutput(const char *filename, int width, int height, int rate);
+    Y4MOutput(const char *filename, int width, int height, int rate, int csp);
 
     virtual ~Y4MOutput();
 
diff -r 695e69ec99db -r dace992d1d66 source/output/yuv.cpp
--- a/source/output/yuv.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/output/yuv.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -21,6 +21,7 @@
  * For more information, contact us at licensing at multicorewareinc.com.
  *****************************************************************************/
 
+#include "common.h"
 #include "PPA/ppa.h"
 #include "output.h"
 #include "yuv.h"
@@ -28,13 +29,20 @@
 using namespace x265;
 using namespace std;
 
-YUVOutput::YUVOutput(const char *filename, int w, int h, uint32_t d)
+YUVOutput::YUVOutput(const char *filename, int w, int h, uint32_t d, int csp)
     : width(w)
     , height(h)
     , depth(d)
+    , colorSpace(csp)
+    , frameSize(0)
 {
     ofs.open(filename, ios::binary | ios::out);
     buf = new char[width];
+
+    for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
+    {
+        frameSize += (uint32_t)((width >> x265_cli_csps[colorSpace].width[i]) * (height >> x265_cli_csps[colorSpace].height[i]));
+    }
 }
 
 YUVOutput::~YUVOutput()
@@ -47,68 +55,30 @@
 {
     PPAStartCpuEventFunc(write_yuv);
     uint32_t pixelbytes = (depth > 8) ? 2 : 1;
-    std::ofstream::pos_type size = 3 * (width * height * pixelbytes) / 2;
-    ofs.seekp(size * pic.poc);
+    ofs.seekp(pic.poc * frameSize * pixelbytes);
 
-    if (pic.bitDepth > 8 && depth == 8)
+    if (pic.bitDepth > 8)
     {
-        // encoder gave us short pixels, downscale, then write
-        uint16_t *Y = (uint16_t*)pic.planes[0];
-        for (int i = 0; i < height; i++)
+        for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
         {
-            for (int j = 0; j < width; j++)
+            uint16_t *src = (uint16_t*)pic.planes[0];
+            for (int h = 0; h < height; h++)
             {
-                buf[j] = (char)Y[j];
+                ofs.write((const char*) src, (width * pixelbytes) >> x265_cli_csps[colorSpace].width[i]);
+                src += pic.stride[i];
             }
-
-            ofs.write(buf, width);
-            Y += pic.stride[0];
-        }
-        uint16_t *U = (uint16_t*)pic.planes[1];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            for (int j = 0; j < width >> 1; j++)
-            {
-                buf[j] = (char)U[j];
-            }
-
-            ofs.write(buf, width >> 1);
-            U += pic.stride[1];
-        }
-        uint16_t *V = (uint16_t*)pic.planes[2];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            for (int j = 0; j < width >> 1; j++)
-            {
-                buf[j] = (char)V[j];
-            }
-
-            ofs.write(buf, width >> 1);
-            V += pic.stride[2];
         }
     }
     else
     {
-        // encoder pixels same size as output pixels, write them directly
-        char *Y = (char*)pic.planes[0];
-        for (int i = 0; i < height; i++)
+        for (int i = 0; i < x265_cli_csps[colorSpace].planes; i++)
         {
-            ofs.write(Y, width * pixelbytes);
-            Y += pic.stride[0] * pixelbytes;
-        }
-
-        char *U = (char*)pic.planes[1];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            ofs.write(U, (width >> 1) * pixelbytes);
-            U += pic.stride[1] * pixelbytes;
-        }
-
-        char *V = (char*)pic.planes[2];
-        for (int i = 0; i < height >> 1; i++)
-        {
-            ofs.write(V, (width >> 1) * pixelbytes);
-            V += pic.stride[2] * pixelbytes;
+            char *src = (char *)pic.planes[0];
+            for (int h = 0; h < height; h++)
+            {
+                ofs.write(src, width >> x265_cli_csps[colorSpace].width[i]);
+                src += pic.stride[i];
+            }
         }
     }
 
diff -r 695e69ec99db -r dace992d1d66 source/output/yuv.h
--- a/source/output/yuv.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/output/yuv.h	Tue Nov 05 14:53:10 2013 -0600
@@ -41,13 +41,17 @@
 
     uint32_t depth;
 
+    int colorSpace;
+
+    uint32_t frameSize;
+
     char *buf;
 
     std::ofstream ofs;
 
 public:
 
-    YUVOutput(const char *filename, int width, int height, uint32_t bitdepth);
+    YUVOutput(const char *filename, int width, int height, uint32_t bitdepth, int csp);
 
     virtual ~YUVOutput();
 
diff -r 695e69ec99db -r dace992d1d66 source/x265.cpp
--- a/source/x265.cpp	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/x265.cpp	Tue Nov 05 14:53:10 2013 -0600
@@ -79,6 +79,7 @@
     { "input",          required_argument, NULL, 0 },
     { "input-depth",    required_argument, NULL, 0 },
     { "input-res",      required_argument, NULL, 0 },
+    { "input-csp",      required_argument, NULL, 0 },
     { "fps",            required_argument, NULL, 0 },
     { "frame-skip",     required_argument, NULL, 0 },
     { "frames",         required_argument, NULL, 'f' },
@@ -273,8 +274,9 @@
     H0("-o/--output                      Bitstream output file name\n");
     H0("\nInput Options:\n");
     H0("   --input                       Raw YUV or Y4M input file name\n");
-    H0("   --input-depth                 Bit-depth of input file (YUV only) Default %d\n", param->inputBitDepth);
+    H0("   --input-depth                 Bit-depth of input file and internal encoder bit depth. Default %d\n", param->inputBitDepth);
     H0("   --input-res                   Source picture size [w x h], auto-detected if Y4M\n");
+    H0("   --input-csp                   Source color space parameter, auto-detected if Y4M\n");
     H0("   --fps                         Source frame rate, auto-detected if Y4M\n");
     H0("   --frame-skip                  Number of frames to skip at start of input file\n");
     H0("-f/--frames                      Number of frames to be encoded. Default all\n");
@@ -326,8 +328,8 @@
     H0("   --[no-]ssim                   Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim));
     H0("   --[no-]psnr                   Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr));
     H0("\nReconstructed video options (debugging):\n");
-    H0("-r/--recon                       Reconstructed image YUV or Y4M output file name\n");
-    H0("   --recon-depth                 Bit-depth of output file. Default %d\n", param->reconFileBitDepth);
+    H0("-r/--recon                       Reconstructed raw image YUV or Y4M output file name\n");
+    H0("   --recon-depth                 Bit-depth of reconstructed raw image file. Default 8\n");
     H0("\nSEI options:\n");
     H0("   --hash                        Decoded Picture Hash SEI 0: disabled, 1: MD5, 2: CRC, 3: Checksum. Default %d\n", param->decodedPictureHashSEI);
 #undef OPT
@@ -340,12 +342,14 @@
     int berror = 0;
     int help = 0;
     int cpuid = 0;
+    int reconFileBitDepth = 8;
     const char *inputfn = NULL;
     const char *reconfn = NULL;
     const char *bitstreamfn = NULL;
     const char *inputRes = NULL;
     const char *preset = "medium";
     const char *tune = "psnr";
+    const char *inputCsp = NULL;
 
     /* Presets are applied before all other options. */
     for (optind = 0;; )
@@ -422,8 +426,9 @@
             OPT("input") inputfn = optarg;
             OPT("recon") reconfn = optarg;
             OPT("input-depth") param->inputBitDepth = (uint32_t)atoi(optarg);
-            OPT("recon-depth") param->reconFileBitDepth = (uint32_t)atoi(optarg);
+            OPT("recon-depth") reconFileBitDepth = (uint32_t)atoi(optarg);
             OPT("input-res") inputRes = optarg;
+            OPT("input-csp") inputCsp = optarg;
             OPT("y4m") bForceY4m = true;
         else
             berror |= x265_param_parse(param, long_options[long_options_index].name, optarg);
@@ -465,12 +470,15 @@
     if (this->input->getWidth())
     {
         /* parse the width, height, frame rate from the y4m file */
+        param->sourceCsp = this->input->getColorSpace();
         param->sourceWidth = this->input->getWidth();
         param->sourceHeight = this->input->getHeight();
         param->frameRate = (int)this->input->getRate();
     }
     else if (inputRes)
     {
+        parseCspName(inputCsp, &param->sourceCsp);
+        this->input->setColorSpace(param->sourceCsp);
         sscanf(inputRes, "%dx%d", &param->sourceWidth, &param->sourceHeight);
         this->input->setDimensions(param->sourceWidth, param->sourceHeight);
         this->input->setBitDepth(param->inputBitDepth);
@@ -486,9 +494,9 @@
         this->input->setBitDepth(param->inputBitDepth);
     }
 
-    if (param->reconFileBitDepth > 0)
+    if (reconFileBitDepth > 0)
     {
-        if (param->reconFileBitDepth != param->inputBitDepth)
+        if (reconFileBitDepth != param->inputBitDepth)
         {
             x265_log(param, X265_LOG_ERROR, "Bit depth of the recon file should be the same as input bit depth\n");
             /* TODO: Support recon files with bitdepth > input bit depth??*/
@@ -497,7 +505,7 @@
     }
     else
     {
-        param->reconFileBitDepth = param->inputBitDepth;
+        reconFileBitDepth = param->inputBitDepth;
     }
 
     int guess = this->input->guessFrameCount();
@@ -515,11 +523,13 @@
     if (param->logLevel >= X265_LOG_INFO)
     {
         if (this->framesToBeEncoded == 0)
-            fprintf(stderr, "%s  [info]: %dx%d %dHz, unknown frame count\n", input->getName(),
-                    param->sourceWidth, param->sourceHeight, param->frameRate);
+            fprintf(stderr, "%s  [info]: %dx%d %dHz %s, unknown frame count\n", input->getName(),
+                    param->sourceWidth, param->sourceHeight, param->frameRate,
+                    (param->sourceCsp >= X265_CSP_I444) ? "C444" : (param->sourceCsp >= X265_CSP_I422) ? "C422" : "C420");
         else
-            fprintf(stderr, "%s  [info]: %dx%d %dHz, frames %u - %d of %d\n", input->getName(),
+            fprintf(stderr, "%s  [info]: %dx%d %dHz %s, frames %u - %d of %d\n", input->getName(),
                     param->sourceWidth, param->sourceHeight, param->frameRate,
+                    (param->sourceCsp >= X265_CSP_I444) ? "C444" : (param->sourceCsp >= X265_CSP_I422) ? "C422" : "C420",
                     this->frameSkip, this->frameSkip + this->framesToBeEncoded - 1, fileFrameCount);
     }
 
@@ -527,7 +537,7 @@
 
     if (reconfn)
     {
-        this->recon = Output::open(reconfn, param->sourceWidth, param->sourceHeight, param->reconFileBitDepth, param->frameRate);
+        this->recon = Output::open(reconfn, param->sourceWidth, param->sourceHeight, reconFileBitDepth, param->frameRate, param->sourceCsp);
         if (this->recon->isFail())
         {
             x265_log(param, X265_LOG_WARNING, "unable to write reconstruction file\n");
diff -r 695e69ec99db -r dace992d1d66 source/x265.h
--- a/source/x265.h	Tue Nov 05 14:54:13 2013 -0600
+++ b/source/x265.h	Tue Nov 05 14:53:10 2013 -0600
@@ -211,6 +211,46 @@
 #define IS_X265_TYPE_I(x) ((x) == X265_TYPE_I || (x) == X265_TYPE_IDR)
 #define IS_X265_TYPE_B(x) ((x) == X265_TYPE_B || (x) == X265_TYPE_BREF)
 
+/* Colorspace type */
+#define X265_CSP_MASK           0x00ff  /* */
+#define X265_CSP_NONE           0x0000  /* Invalid mode     */
+#define X265_CSP_I420           0x0001  /* yuv 4:2:0 planar */
+#define X265_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
+#define X265_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
+#define X265_CSP_I422           0x0004  /* yuv 4:2:2 planar */
+#define X265_CSP_YV16           0x0005  /* yvu 4:2:2 planar */
+#define X265_CSP_NV16           0x0006  /* yuv 4:2:2, with one y plane and one packed u+v */
+#define X265_CSP_I444           0x0007  /* yuv 4:4:4 planar */
+#define X265_CSP_YV24           0x0008  /* yvu 4:4:4 planar */
+#define X265_CSP_BGR            0x0009  /* packed bgr 24bits   */
+#define X265_CSP_BGRA           0x000a  /* packed bgr 32bits   */
+#define X265_CSP_RGB            0x000b  /* packed rgb 24bits   */
+#define X265_CSP_MAX            0x000c  /* end of list */
+#define X265_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
+#define X265_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
+
+typedef struct
+{
+    const char *name;
+    int planes;
+    int width[3];
+    int height[3];
+    int mod_width;
+    int mod_height;
+} x265_cli_csp_t;
+
+const x265_cli_csp_t x265_cli_csps[] = {
+    { "none", 0, { 0, 0, 0 },   { 0, 0, 0 },   0, 0 },
+    { "i420", 3, { 0, 1, 1 },   { 0, 1, 1 },   2, 2 },
+    { "yv12", 3, { 0, 1, 1 },   { 0, 1, 1 },   2, 2 },
+    { "nv12", 2, { 0,  0 },     { 0, 1 },      2, 2 },
+    { "i422", 3, { 0, 1, 1 },   { 0,  0,  0 }, 2, 1 },
+    { "yv16", 3, { 0, 1, 1 },   { 0,  0,  0 }, 2, 1 },
+    { "nv16", 2, { 0,  0 },     { 0,  0 },     2, 1 },
+    { "i444", 3, { 0,  0,  0 }, { 0,  0,  0 }, 1, 1 },
+    { "yv24", 3, { 0,  0,  0 }, { 0,  0,  0 }, 1, 1 },
+};
+
 /* rate tolerance method */
 typedef enum
 {
@@ -252,13 +292,12 @@
     int       frameNumThreads;                 ///< number of concurrently encoded frames
     const char *csvfn;                         ///< csv log filename. logLevel >= 3 is frame logging, else one line per run
 
-    int       inputBitDepth;
-    int       reconFileBitDepth;
-
     // source specification
+    int       inputBitDepth;                   ///< source pixel bit depth (and internal encoder bit depth)
     int       frameRate;                       ///< source frame-rate in Hz
     int       sourceWidth;                     ///< source width in pixels
     int       sourceHeight;                    ///< source height in pixels
+    int       sourceCsp;                       ///< source Color Space Parameter
 
     // coding unit (CU) definition
     uint32_t  maxCUSize;                       ///< max. CU width and height in pixels



More information about the x265-devel mailing list