[x265] [PATCH] asm: 10bpp code for enabling ssim_end_4

murugan at multicorewareinc.com murugan at multicorewareinc.com
Wed Dec 4 12:36:34 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1386156966 -19800
#      Wed Dec 04 17:06:06 2013 +0530
# Node ID e0b3519259050451f272ba6cd3fa8cef77f5119f
# Parent  e1e18d9cd5b0fa7d14c655819bd347a5c8accbde
asm: 10bpp code for enabling ssim_end_4

diff -r e1e18d9cd5b0 -r e0b351925905 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Wed Dec 04 12:53:19 2013 +0530
+++ b/source/common/pixel.cpp	Wed Dec 04 17:06:06 2013 +0530
@@ -651,12 +651,15 @@
 /* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases.
  * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
  * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */
+
 #define PIXEL_MAX ((1 << X265_DEPTH) - 1)
 #if HIGH_BIT_DEPTH
+assert(X265_DEPTH == 10);
 #define type float
     static const float ssim_c1 = (float)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64);
     static const float ssim_c2 = (float)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63);
 #else
+assert(X265_DEPTH == 8);
 #define type int
     static const int ssim_c1 = (int)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64 + .5);
     static const int ssim_c2 = (int)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63 + .5);
diff -r e1e18d9cd5b0 -r e0b351925905 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Dec 04 12:53:19 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Dec 04 17:06:06 2013 +0530
@@ -533,6 +533,7 @@
         p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
 
         p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
+        p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
         PIXEL_AVG(sse2);
         PIXEL_AVG_W4(mmx2);
         LUMA_VAR(_sse2);


More information about the x265-devel mailing list