[x265] [PATCH Review only] asm: added psy_acEnergy_pp_4x4 in sse4 for psyCost_pp

Divya Manivannan divya at multicorewareinc.com
Tue Dec 16 11:35:49 CET 2014


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1418726099 -19800
#      Tue Dec 16 16:04:59 2014 +0530
# Node ID de6f39b44c144aa56c68d27d6ee201e7dd493755
# Parent  775ebb4694ad7931a98b796640bf646085659ea2
asm: added psy_acEnergy_pp_4x4 in sse4 for psyCost_pp

diff -r 775ebb4694ad -r de6f39b44c14 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Tue Dec 16 09:40:00 2014 +0530
+++ b/source/common/pixel.cpp	Tue Dec 16 16:04:59 2014 +0530
@@ -795,8 +795,18 @@
     else
     {
         /* 4x4 is too small for sa8d */
-        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
-        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
+        int sourceEnergy, reconEnergy;
+        if (!HIGH_BIT_DEPTH)    // once HBD asm code is developed, if condition will go away
+        {
+            sourceEnergy = primitives.psy_acEnergy_pp(source, sstride);
+            reconEnergy = primitives.psy_acEnergy_pp(recon, rstride);
+        }
+        else
+        {
+            //original code;
+            sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 4>(source, sstride, zeroBuf, 0) >> 2);
+            reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 4>(recon, rstride, zeroBuf, 0) >> 2);
+        }
         return abs(sourceEnergy - reconEnergy);
     }
 }
diff -r 775ebb4694ad -r de6f39b44c14 source/common/primitives.h
--- a/source/common/primitives.h	Tue Dec 16 09:40:00 2014 +0530
+++ b/source/common/primitives.h	Tue Dec 16 16:04:59 2014 +0530
@@ -195,6 +195,7 @@
 typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
 
 typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
+typedef int(*psy_acEnergy_pp_t)(const pixel* pix, intptr_t stride);
 
 /* Define a structure containing function pointers to optimized encoder
  * primitives.  Each pointer can reference either an assembly routine,
@@ -213,6 +214,7 @@
     pixelcmp_t            sa8d[NUM_SQUARE_BLOCKS];         // sa8d primitives for square intra blocks
     pixelcmp_t            psy_cost_pp[NUM_SQUARE_BLOCKS];  // difference in AC energy between two blocks
     pixelcmp_ss_t         psy_cost_ss[NUM_SQUARE_BLOCKS];
+    psy_acEnergy_pp_t     psy_acEnergy_pp;
 
     dct_t                 dct[NUM_DCTS];
     idct_t                idct[NUM_IDCTS];
diff -r 775ebb4694ad -r de6f39b44c14 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 16 09:40:00 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 16 16:04:59 2014 +0530
@@ -1898,6 +1898,9 @@
         p.chroma[X265_CSP_I420].filter_vpp[CHROMA_16x16] = x265_interp_4tap_vert_pp_16x16_avx2;
         p.chroma[X265_CSP_I420].filter_vpp[CHROMA_32x32] = x265_interp_4tap_vert_pp_32x32_avx2;
 #endif
+
+        p.psy_acEnergy_pp = x265_psy_acEnergy_pp_4x4_sse4;
+
     }
 #endif // if HIGH_BIT_DEPTH
 }
diff -r 775ebb4694ad -r de6f39b44c14 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Tue Dec 16 09:40:00 2014 +0530
+++ b/source/common/x86/pixel-a.asm	Tue Dec 16 16:04:59 2014 +0530
@@ -6579,3 +6579,35 @@
     mov         [r2], r3w
 .end:
     RET
+
+;---------------------------------------------------------------------------------------------------------------------
+;int psy_acEnergy_pp(const pixel* source, intptr_t sstride)
+;---------------------------------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal psy_acEnergy_pp_4x4, 2, 3, 6
+
+    lea             r2, [3 * r1]
+    movd            m0, [r0]
+    movd            m1, [r0 + r1]
+    movd            m2, [r0 + r1 * 2]
+    movd            m3, [r0 + r2]
+    shufps          m0, m1, 0
+    shufps          m2, m3, 0
+    mova            m4, [hmul_4p]
+    pmaddubsw       m0, m4
+    pmaddubsw       m2, m4
+
+    paddw           m5, m0, m2
+    movhlps         m4, m5
+    paddw           m5, m4
+    phaddw          m5, m5
+    pmovzxwd        m5, m5
+    psrld           m5, 2
+
+    HADAMARD 0, sumsub, 0, 2, 1, 3
+    HADAMARD 4, sumsub, 0, 2, 1, 3
+    HADAMARD 1, amax, 0, 2, 1, 3
+    HADDW m0, m2
+    psubd           m0, m5
+    movd            eax, m0
+    RET
diff -r 775ebb4694ad -r de6f39b44c14 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Tue Dec 16 09:40:00 2014 +0530
+++ b/source/common/x86/pixel.h	Tue Dec 16 16:04:59 2014 +0530
@@ -224,4 +224,6 @@
 #undef DECL_X1
 #undef DECL_X4
 
+int x265_psy_acEnergy_pp_4x4_sse4(const pixel* pix1, intptr_t stride_pix1);
+
 #endif // ifndef X265_I386_PIXEL_H


More information about the x265-devel mailing list