[x265] [PATCH] asm: sse version 10bit code for chroma_p2s, reuse luma code

rajesh at multicorewareinc.com rajesh at multicorewareinc.com
Thu Apr 16 08:19:18 CEST 2015


# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1429164423 -19800
#      Thu Apr 16 11:37:03 2015 +0530
# Node ID 9248bece15a1ec8439210c0e517fb3f3bc305080
# Parent  7dec3ef187cbf3c1fd5ebfdce1172dc577e11dfe
asm: sse version 10bit code for chroma_p2s, reuse luma code

diff -r 7dec3ef187cb -r 9248bece15a1 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Apr 16 11:24:34 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Apr 16 11:37:03 2015 +0530
@@ -975,6 +975,43 @@
         p.pu[LUMA_12x16].convert_p2s = x265_filterPixelToShort_12x16_ssse3;
         p.pu[LUMA_48x64].convert_p2s = x265_filterPixelToShort_48x64_ssse3;
 
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].p2s = x265_filterPixelToShort_4x4_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].p2s = x265_filterPixelToShort_4x8_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].p2s = x265_filterPixelToShort_4x16_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].p2s = x265_filterPixelToShort_8x4_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].p2s = x265_filterPixelToShort_8x8_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].p2s = x265_filterPixelToShort_8x16_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].p2s = x265_filterPixelToShort_8x32_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].p2s = x265_filterPixelToShort_16x4_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].p2s = x265_filterPixelToShort_16x8_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].p2s = x265_filterPixelToShort_16x12_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].p2s = x265_filterPixelToShort_16x16_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].p2s = x265_filterPixelToShort_16x32_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].p2s = x265_filterPixelToShort_32x8_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].p2s = x265_filterPixelToShort_32x16_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].p2s = x265_filterPixelToShort_32x24_ssse3;
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].p2s = x265_filterPixelToShort_32x32_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].p2s = x265_filterPixelToShort_4x4_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].p2s = x265_filterPixelToShort_4x8_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].p2s = x265_filterPixelToShort_4x16_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].p2s = x265_filterPixelToShort_4x32_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].p2s = x265_filterPixelToShort_8x4_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].p2s = x265_filterPixelToShort_8x8_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].p2s = x265_filterPixelToShort_8x12_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].p2s = x265_filterPixelToShort_8x16_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].p2s = x265_filterPixelToShort_8x32_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].p2s = x265_filterPixelToShort_8x64_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].p2s = x265_filterPixelToShort_12x32_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].p2s = x265_filterPixelToShort_16x8_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].p2s = x265_filterPixelToShort_16x16_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].p2s = x265_filterPixelToShort_16x24_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].p2s = x265_filterPixelToShort_16x32_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].p2s = x265_filterPixelToShort_16x64_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].p2s = x265_filterPixelToShort_24x64_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].p2s = x265_filterPixelToShort_32x16_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].p2s = x265_filterPixelToShort_32x32_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].p2s = x265_filterPixelToShort_32x48_ssse3;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].p2s = x265_filterPixelToShort_32x64_ssse3;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].p2s = x265_filterPixelToShort_4x2_ssse3;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].p2s = x265_filterPixelToShort_8x2_ssse3;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].p2s = x265_filterPixelToShort_8x6_ssse3;
@@ -1021,6 +1058,7 @@
         p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].p2s = x265_filterPixelToShort_2x4_sse4;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].p2s = x265_filterPixelToShort_2x8_sse4;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].p2s = x265_filterPixelToShort_6x8_sse4;
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_2x8].p2s = x265_filterPixelToShort_2x8_sse4;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_2x16].p2s = x265_filterPixelToShort_2x16_sse4;
         p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].p2s = x265_filterPixelToShort_6x16_sse4;
     }
diff -r 7dec3ef187cb -r 9248bece15a1 source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h	Thu Apr 16 11:24:34 2015 +0530
+++ b/source/common/x86/ipfilter8.h	Thu Apr 16 11:37:03 2015 +0530
@@ -328,7 +328,18 @@
     SETUP_CHROMA_P2S_FUNC_DEF(2, 8, cpu); \
     SETUP_CHROMA_P2S_FUNC_DEF(6, 8, cpu);
 
+#define CHROMA_422_P2S_FILTERS_SSSE3(cpu) \
+    SETUP_CHROMA_P2S_FUNC_DEF(4, 32, cpu) \
+    SETUP_CHROMA_P2S_FUNC_DEF(8, 12, cpu); \
+    SETUP_CHROMA_P2S_FUNC_DEF(8, 64, cpu); \
+    SETUP_CHROMA_P2S_FUNC_DEF(12, 32, cpu); \
+    SETUP_CHROMA_P2S_FUNC_DEF(16, 24, cpu); \
+    SETUP_CHROMA_P2S_FUNC_DEF(16, 64, cpu); \
+    SETUP_CHROMA_P2S_FUNC_DEF(24, 64, cpu); \
+    SETUP_CHROMA_P2S_FUNC_DEF(32, 48, cpu);
+
 #define CHROMA_422_P2S_FILTERS_SSE4(cpu) \
+    SETUP_CHROMA_P2S_FUNC_DEF(2, 8, cpu); \
     SETUP_CHROMA_P2S_FUNC_DEF(2, 16, cpu) \
     SETUP_CHROMA_P2S_FUNC_DEF(6, 16, cpu);
 
@@ -342,6 +353,7 @@
 CHROMA_422_HORIZ_FILTERS(_sse4);
 CHROMA_422_VERT_FILTERS_SSE4(_sse4);
 CHROMA_422_P2S_FILTERS_SSE4(_sse4);
+CHROMA_422_P2S_FILTERS_SSSE3(_ssse3);
 
 CHROMA_444_VERT_FILTERS(_sse2);
 CHROMA_444_HORIZ_FILTERS(_sse4);


More information about the x265-devel mailing list