[x265] [PATCH] added 24x32 partion size asm code to chroma function
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Oct 18 13:37:47 CEST 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1382096256 -19800
# Node ID 4ca261df8acbef14b9a09676fef9fa354c35e19c
# Parent 0efb3f85325f03edb436b260ba28189b8eae6b3f
added 24x32 partion size asm code to chroma function
diff -r 0efb3f85325f -r 4ca261df8acb source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Oct 18 16:18:12 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Oct 18 17:07:36 2013 +0530
@@ -150,10 +150,10 @@
SETUP_CHROMA_FUNC_DEF(32, 16, cpu); \
SETUP_CHROMA_FUNC_DEF(16, 32, cpu); \
SETUP_CHROMA_FUNC_DEF(32, 24, cpu); \
+ SETUP_CHROMA_FUNC_DEF(24, 32, cpu); \
SETUP_CHROMA_FUNC_DEF(32, 8, cpu); \
- SETUP_CHROMA_FUNC_DEF(8, 32, cpu)
+ SETUP_CHROMA_FUNC_DEF(8, 32, cpu);
-//SETUP_CHROMA_FUNC_DEF(24, 32, cpu); /* 24x32 has not yet been implemented */
#define SETUP_LUMA_FUNC_DEF(W, H, cpu) \
p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu
diff -r 0efb3f85325f -r 4ca261df8acb source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Fri Oct 18 16:18:12 2013 +0530
+++ b/source/common/x86/ipfilter8.asm Fri Oct 18 17:07:36 2013 +0530
@@ -345,6 +345,34 @@
movu [dstq], %2
%endmacro
+%macro FILTER_H4_w24 4
+ movu %1, [srcq - 1]
+ pshufb %2, %1, Tm0
+ pmaddubsw %2, coef2
+ pshufb %1, %1, Tm1
+ pmaddubsw %1, coef2
+ phaddw %2, %1
+ movu %1, [srcq - 1 + 8]
+ pshufb %4, %1, Tm0
+ pmaddubsw %4, coef2
+ pshufb %1, %1, Tm1
+ pmaddubsw %1, coef2
+ phaddw %4, %1
+ pmulhrsw %2, %3
+ pmulhrsw %4, %3
+ packuswb %2, %4
+ movu [dstq], %2
+ movu %1, [srcq - 1 + 16]
+ pshufb %2, %1, Tm0
+ pmaddubsw %2, coef2
+ pshufb %1, %1, Tm1
+ pmaddubsw %1, coef2
+ phaddw %2, %1
+ pmulhrsw %2, %3
+ packuswb %2, %2
+ movh [dstq + 16], %2
+%endmacro
+
%macro FILTER_H4_w32 4
movu %1, [srcq - 1]
pshufb %2, %1, Tm0
@@ -479,6 +507,7 @@
IPFILTER_CHROMA_W 32, 8
IPFILTER_CHROMA_W 32, 16
IPFILTER_CHROMA_W 32, 24
+IPFILTER_CHROMA_W 24, 32
IPFILTER_CHROMA_W 32, 32
More information about the x265-devel
mailing list