[x265] [PATCH] asm: enabled asm routines for HIGH_BIT_DEPTH, which has the support for 16bpp

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Thu Nov 28 14:48:03 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1385646443 -19800
#      Thu Nov 28 19:17:23 2013 +0530
# Node ID 3a916cf2261b87ea9dc01f20dd9014c45e3ce7d3
# Parent  949f85337789c8d00f39ed1a010990efe67ebcf4
asm: enabled asm routines for HIGH_BIT_DEPTH, which has the support for 16bpp

diff -r 949f85337789 -r 3a916cf2261b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Nov 27 18:10:14 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Thu Nov 28 19:17:23 2013 +0530
@@ -456,7 +456,37 @@
 void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
 {
 #if HIGH_BIT_DEPTH
-    if (cpuMask & X265_CPU_SSE2) p.sa8d[0] = p.sa8d[0];
+    if (cpuMask & X265_CPU_SSE2) 
+    {
+        p.sa8d[0] = p.sa8d[0];
+        INIT6(satd, _sse2);
+        p.satd[LUMA_4x16] = x265_pixel_satd_4x16_sse2;
+        p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
+        p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
+        p.satd[LUMA_16x12] = x265_pixel_satd_16x12_sse2;
+        p.satd[LUMA_16x32] = x265_pixel_satd_16x32_sse2;
+        p.satd[LUMA_16x64] = x265_pixel_satd_16x64_sse2;
+
+        p.sa8d_inter[LUMA_8x8]  = x265_pixel_sa8d_8x8_sse2;
+        p.sa8d_inter[LUMA_16x16]  = x265_pixel_sa8d_16x16_sse2; 
+
+        p.sse_ss[LUMA_4x4]   = x265_pixel_ssd_ss_4x4_mmx2;
+        p.sse_ss[LUMA_4x8]   = x265_pixel_ssd_ss_4x8_mmx2;
+        p.sse_ss[LUMA_4x16]   = x265_pixel_ssd_ss_4x16_mmx2;
+        p.sse_ss[LUMA_8x4]   = x265_pixel_ssd_ss_8x4_sse2;
+        p.sse_ss[LUMA_8x8]   = x265_pixel_ssd_ss_8x8_sse2;
+        p.sse_ss[LUMA_8x16]   = x265_pixel_ssd_ss_8x16_sse2;
+        p.sse_ss[LUMA_16x8]   = x265_pixel_ssd_ss_16x8_sse2;
+        p.sse_ss[LUMA_16x16]   = x265_pixel_ssd_ss_16x16_sse2;
+
+        p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
+        PIXEL_AVG_W4(sse2);
+        p.pixelavg_pp[LUMA_8x16]  = x265_pixel_avg_8x16_sse2;
+        p.pixelavg_pp[LUMA_8x8]   = x265_pixel_avg_8x8_sse2;
+        p.pixelavg_pp[LUMA_8x4]   = x265_pixel_avg_8x4_sse2;
+        p.pixelavg_pp[LUMA_16x16] = x265_pixel_avg_16x16_sse2;
+        p.pixelavg_pp[LUMA_16x8]  = x265_pixel_avg_16x8_sse2;
+    }
 #else
     if (cpuMask & X265_CPU_SSE2)
     {
diff -r 949f85337789 -r 3a916cf2261b source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Wed Nov 27 18:10:14 2013 -0600
+++ b/source/common/x86/pixel-a.asm	Thu Nov 28 19:17:23 2013 +0530
@@ -111,7 +111,7 @@
 ; int pixel_ssd_WxH( uint16_t *, intptr_t, uint16_t *, intptr_t )
 ;-----------------------------------------------------------------------------
 %macro SSD_ONE 2
-cglobal pixel_ssd_%1x%2, 4,7,6
+cglobal pixel_ssd_ss_%1x%2, 4,7,6
     FIX_STRIDES r1, r3
 %if mmsize == %1*2
     %define offset0_1 r1


More information about the x265-devel mailing list