[x265] [PATCH] asm: 10bpp code for enabling ssim_end_4
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Wed Dec 4 12:36:34 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1386156966 -19800
# Wed Dec 04 17:06:06 2013 +0530
# Node ID e0b3519259050451f272ba6cd3fa8cef77f5119f
# Parent e1e18d9cd5b0fa7d14c655819bd347a5c8accbde
asm: 10bpp code for enabling ssim_end_4
diff -r e1e18d9cd5b0 -r e0b351925905 source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Dec 04 12:53:19 2013 +0530
+++ b/source/common/pixel.cpp Wed Dec 04 17:06:06 2013 +0530
@@ -651,12 +651,15 @@
/* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = 4286582784, which will overflow in some cases.
* s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
* Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */
+
#define PIXEL_MAX ((1 << X265_DEPTH) - 1)
#if HIGH_BIT_DEPTH
+assert(X265_DEPTH == 10);
#define type float
static const float ssim_c1 = (float)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64);
static const float ssim_c2 = (float)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63);
#else
+assert(X265_DEPTH == 8);
#define type int
static const int ssim_c1 = (int)(.01 * .01 * PIXEL_MAX * PIXEL_MAX * 64 + .5);
static const int ssim_c2 = (int)(.03 * .03 * PIXEL_MAX * PIXEL_MAX * 64 * 63 + .5);
diff -r e1e18d9cd5b0 -r e0b351925905 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Dec 04 12:53:19 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Dec 04 17:06:06 2013 +0530
@@ -533,6 +533,7 @@
p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_sse2;
+ p.ssim_end_4 = x265_pixel_ssim_end4_sse2;
PIXEL_AVG(sse2);
PIXEL_AVG_W4(mmx2);
LUMA_VAR(_sse2);
More information about the x265-devel
mailing list