[x265] [PATCH 01 of 18] asm: intra_pred_ang4_2_sse2 16-bit

dtyx265 at gmail.com dtyx265 at gmail.com
Fri Apr 3 18:24:32 CEST 2015


# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1428072972 25200
# Node ID 77edd96a4c1bc61d0bff30c4b2efef5bb8fbe2a1
# Parent  9a5fa67583feb6ffb7668f82632f7e93e5ec9415
asm: intra_pred_ang4_2_sse2 16-bit

This is backported from sse4 code and replaces c code.

./test/TestBench --testbench intrapred | grep "intra_ang_4x4\[ 2\]"
intra_ang_4x4[ 2]	8.76x 	 142.46   	 1248.07

diff -r 9a5fa67583fe -r 77edd96a4c1b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Apr 02 13:21:32 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Fri Apr 03 07:56:12 2015 -0700
@@ -882,6 +882,8 @@
         p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = x265_intra_pred_planar16_sse2;
         p.cu[BLOCK_32x32].intra_pred[PLANAR_IDX] = x265_intra_pred_planar32_sse2;
 
+        p.cu[BLOCK_4x4].intra_pred[2] = x265_intra_pred_ang4_2_sse2;
+
         p.cu[BLOCK_4x4].sse_ss = x265_pixel_ssd_ss_4x4_mmx2;
         ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);
 
diff -r 9a5fa67583fe -r 77edd96a4c1b source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Thu Apr 02 13:21:32 2015 -0500
+++ b/source/common/x86/intrapred16.asm	Fri Apr 03 07:56:12 2015 -0700
@@ -690,6 +690,28 @@
 %endrep
     RET
 
+;-----------------------------------------------------------------------------------------
+; void intraPredAng4(pixel* dst, intptr_t dstStride, pixel* src, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal intra_pred_ang4_2, 3,5,4
+    lea         r4,            [r2 + 4]
+    add         r2,            20
+    cmp         r3m,           byte 34
+    cmove       r2,            r4
+
+    add         r1,            r1
+    movu        m0,            [r2]
+    movh        [r0],          m0
+    psrldq      m0,            2
+    movh        [r0 + r1],     m0
+    psrldq      m0,            2
+    movh        [r0 + r1 * 2], m0
+    lea         r1,            [r1 * 3]
+    psrldq      m0,            2
+    movh        [r0 + r1],     m0
+    RET
+
 ;-----------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
 ;-----------------------------------------------------------------------------------


More information about the x265-devel mailing list