[x265] [PATCH] added 24x32 partion size asm code to chroma function

praveen at multicorewareinc.com praveen at multicorewareinc.com
Fri Oct 18 13:37:47 CEST 2013


# HG changeset patch
# User Praveen Tiwari
# Date 1382096256 -19800
# Node ID 4ca261df8acbef14b9a09676fef9fa354c35e19c
# Parent  0efb3f85325f03edb436b260ba28189b8eae6b3f
added 24x32 partion size asm code to chroma function

diff -r 0efb3f85325f -r 4ca261df8acb source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Oct 18 16:18:12 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Oct 18 17:07:36 2013 +0530
@@ -150,10 +150,10 @@
     SETUP_CHROMA_FUNC_DEF(32, 16, cpu); \
     SETUP_CHROMA_FUNC_DEF(16, 32, cpu); \
     SETUP_CHROMA_FUNC_DEF(32, 24, cpu); \
+    SETUP_CHROMA_FUNC_DEF(24, 32, cpu); \
     SETUP_CHROMA_FUNC_DEF(32, 8, cpu); \
-    SETUP_CHROMA_FUNC_DEF(8, 32, cpu)
+    SETUP_CHROMA_FUNC_DEF(8, 32, cpu);
 
-//SETUP_CHROMA_FUNC_DEF(24, 32, cpu);  /* 24x32 has not yet been implemented */
 
 #define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
     p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu
diff -r 0efb3f85325f -r 4ca261df8acb source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Fri Oct 18 16:18:12 2013 +0530
+++ b/source/common/x86/ipfilter8.asm	Fri Oct 18 17:07:36 2013 +0530
@@ -345,6 +345,34 @@
     movu        [dstq],      %2
 %endmacro
 
+%macro FILTER_H4_w24 4
+    movu        %1, [srcq - 1]
+    pshufb      %2, %1, Tm0
+    pmaddubsw   %2, coef2
+    pshufb      %1, %1, Tm1
+    pmaddubsw   %1, coef2
+    phaddw      %2, %1
+    movu        %1, [srcq - 1 + 8]
+    pshufb      %4, %1, Tm0
+    pmaddubsw   %4, coef2
+    pshufb      %1, %1, Tm1
+    pmaddubsw   %1, coef2
+    phaddw      %4, %1
+    pmulhrsw    %2, %3
+    pmulhrsw    %4, %3
+    packuswb    %2, %4
+    movu        [dstq],          %2
+    movu        %1, [srcq - 1 + 16]
+    pshufb      %2, %1, Tm0
+    pmaddubsw   %2, coef2
+    pshufb      %1, %1, Tm1
+    pmaddubsw   %1, coef2
+    phaddw      %2, %1
+    pmulhrsw    %2, %3
+    packuswb    %2, %2
+    movh        [dstq + 16],     %2
+%endmacro
+
 %macro FILTER_H4_w32 4
     movu        %1, [srcq - 1]
     pshufb      %2, %1, Tm0
@@ -479,6 +507,7 @@
 IPFILTER_CHROMA_W 32, 8
 IPFILTER_CHROMA_W 32, 16
 IPFILTER_CHROMA_W 32, 24
+IPFILTER_CHROMA_W 24, 32
 IPFILTER_CHROMA_W 32, 32
 
 


More information about the x265-devel mailing list