[x265] [PATCH] asm: code for intra_pred[BLOCK_16x16] mode 2 and 34

murugan at multicorewareinc.com murugan at multicorewareinc.com
Wed Jan 8 14:35:08 CET 2014


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1389188029 -19800
#      Wed Jan 08 19:03:49 2014 +0530
# Node ID 49cfed20055f4c444056151cdc98fe2fbdd38e76
# Parent  c4edab8dab65b393ab9d48f7533df554f41ca4fe
asm: code for intra_pred[BLOCK_16x16] mode 2 and 34

diff -r c4edab8dab65 -r 49cfed20055f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Jan 07 18:36:17 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Jan 08 19:03:49 2014 +0530
@@ -550,6 +550,9 @@
 #define SETUP_INTRA_ANG4(mode, fno, cpu) \
     p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
 
+#define SETUP_INTRA_ANG16(mode, fno, cpu) \
+    p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu;
+
 namespace x265 {
 // private x265 namespace
 
@@ -890,6 +893,9 @@
         SETUP_INTRA_ANG4(2, 2, ssse3);
         SETUP_INTRA_ANG4(34, 2, ssse3);
 
+        SETUP_INTRA_ANG16(2, 2, ssse3);
+        SETUP_INTRA_ANG16(34, 2, ssse3);
+
         p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
 
diff -r c4edab8dab65 -r 49cfed20055f source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Tue Jan 07 18:36:17 2014 +0530
+++ b/source/common/x86/intrapred.h	Wed Jan 08 19:03:49 2014 +0530
@@ -58,6 +58,25 @@
 DECL_ANG(4, 18, sse4);
 DECL_ANG(4, 26, sse4);
 
+DECL_ANG(16, 2, ssse3);
+DECL_ANG(16, 3, sse4);
+DECL_ANG(16, 4, sse4);
+DECL_ANG(16, 5, sse4);
+DECL_ANG(16, 6, sse4);
+DECL_ANG(16, 7, sse4);
+DECL_ANG(16, 8, sse4);
+DECL_ANG(16, 9, sse4);
+DECL_ANG(16, 10, sse4);
+DECL_ANG(16, 11, sse4);
+DECL_ANG(16, 12, sse4);
+DECL_ANG(16, 13, sse4);
+DECL_ANG(16, 14, sse4);
+DECL_ANG(16, 15, sse4);
+DECL_ANG(16, 16, sse4);
+DECL_ANG(16, 17, sse4);
+DECL_ANG(16, 18, sse4);
+DECL_ANG(16, 26, sse4);
+
 #undef DECL_ANG
 
 void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
diff -r c4edab8dab65 -r 49cfed20055f source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Tue Jan 07 18:36:17 2014 +0530
+++ b/source/common/x86/intrapred8.asm	Wed Jan 08 19:03:49 2014 +0530
@@ -1107,6 +1107,86 @@
     RET
 
 ;-----------------------------------------------------------------------------
+; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang16_2, 3,3,5
+    cmp             r4m, byte 34
+    cmove           r2, r3mp
+    movu            m0, [r2 + 2]
+    movu            m1, [r2 + 18]
+    movu            [r0], m0
+    punpckhqdq      m2, m0, m0
+    punpcklqdq      m2, m1
+    palignr         m3, m0, 1
+    palignr         m4, m2, 1
+    punpcklqdq      m3, m4
+    movu            [r0 + r1], m3
+    lea             r0, [r0 + r1 * 2]
+    palignr         m3, m0, 2
+    palignr         m4, m2, 2
+    punpcklqdq      m3, m4
+    movu            [r0], m3
+    palignr         m3, m0, 3
+    palignr         m4, m2, 3
+    punpcklqdq      m3, m4
+    movu            [r0 + r1], m3
+    lea             r0, [r0 + r1 * 2]
+    palignr         m3, m0, 4
+    palignr         m4, m2, 4
+    punpcklqdq      m3, m4
+    movu            [r0], m3
+    palignr         m3, m0, 5
+    palignr         m4, m2, 5
+    punpcklqdq      m3, m4
+    movu            [r0 + r1], m3
+    lea             r0, [r0 + r1 * 2]
+    palignr         m3, m0, 6
+    palignr         m4, m2, 6
+    punpcklqdq      m3, m4
+    movu            [r0], m3
+    palignr         m3, m0, 7
+    palignr         m4, m2, 7
+    punpcklqdq      m3, m4
+    movu            [r0 + r1], m3
+    lea             r0, [r0 + r1 * 2]
+    palignr         m0, m3, 1
+    punpcklqdq      m0, m1
+    movu            [r0], m0
+    palignr         m0, m3, 2
+    palignr         m2, m1, 1
+    punpcklqdq      m0, m2
+    movu            [r0 + r1], m0
+    lea             r0, [r0 + r1 * 2]
+    palignr         m0, m3, 3
+    palignr         m2, m1, 2
+    punpcklqdq      m0, m2
+    movu            [r0], m0
+    palignr         m0, m3, 4
+    palignr         m2, m1, 3
+    punpcklqdq      m0, m2
+    movu            [r0 + r1], m0
+    lea             r0, [r0 + r1 * 2]
+    palignr         m0, m3, 5
+    palignr         m2, m1, 4
+    punpcklqdq      m0, m2
+    movu            [r0], m0
+    palignr         m0, m3, 6
+    palignr         m2, m1, 5
+    punpcklqdq      m0, m2
+    movu            [r0 + r1], m0
+    lea             r0, [r0 + r1 * 2]
+    palignr         m0, m3, 7
+    palignr         m2, m1, 6
+    punpcklqdq      m0, m2
+    movu            [r0], m0
+    palignr         m0, m3, 8
+    palignr         m2, m1, 7
+    punpcklqdq      m0, m2
+    movu            [r0 + r1], m0
+    RET
+
+;-----------------------------------------------------------------------------
 ; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma)
 ;-----------------------------------------------------------------------------
 INIT_XMM sse4


More information about the x265-devel mailing list