[x265] [PATCH] sse: fix overflow in sse_ss for 10 bit
Divya Manivannan
divya at multicorewareinc.com
Wed Sep 30 12:56:51 CEST 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1443607299 -19800
# Wed Sep 30 15:31:39 2015 +0530
# Node ID aadec6615a3d5f33b4fdb00079e236b019ef1e95
# Parent 6e7761bdfe23addb862483f8407b388800de7d92
sse: fix overflow in sse_ss for 10 bit
diff -r 6e7761bdfe23 -r aadec6615a3d source/common/common.h
--- a/source/common/common.h Wed Sep 30 14:57:15 2015 +0530
+++ b/source/common/common.h Wed Sep 30 15:31:39 2015 +0530
@@ -135,7 +135,7 @@
typedef int32_t ssum2_t; // Signed sum
#endif // if HIGH_BIT_DEPTH
-#if X265_DEPTH <= 10
+#if X265_DEPTH < 10
typedef uint32_t sse_ret_t;
#else
typedef uint64_t sse_ret_t;
diff -r 6e7761bdfe23 -r aadec6615a3d source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Sep 30 14:57:15 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Sep 30 15:31:39 2015 +0530
@@ -1006,10 +1006,11 @@
p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_4x8_mmx2);
p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_8x16_sse2);
p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_16x32_sse2);
-#if X265_DEPTH <= 10
- p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2);
- ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);
-#endif
+
+ // sse_ss primitive need to be fixed for 10 and 12 bit
+ //p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2);
+ //ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);
+
p.cu[BLOCK_4x4].dct = PFX(dct4_sse2);
p.cu[BLOCK_8x8].dct = PFX(dct8_sse2);
p.cu[BLOCK_4x4].idct = PFX(idct4_sse2);
@@ -1535,11 +1536,12 @@
p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16_avx2);
p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32_avx2);
+ // sse_ss primitive need to be fixed for 10 and 12 bit
+ //p.cu[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2);
+ //p.cu[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2);
+ //p.cu[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2);
+
#if X265_DEPTH <= 10
- p.cu[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2);
- p.cu[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2);
- p.cu[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2);
-
p.cu[BLOCK_16x16].sse_pp = PFX(pixel_ssd_16x16_avx2);
p.cu[BLOCK_32x32].sse_pp = PFX(pixel_ssd_32x32_avx2);
p.cu[BLOCK_64x64].sse_pp = PFX(pixel_ssd_64x64_avx2);
diff -r 6e7761bdfe23 -r aadec6615a3d source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Sep 30 14:57:15 2015 +0530
+++ b/source/common/x86/pixel.h Wed Sep 30 15:31:39 2015 +0530
@@ -39,7 +39,7 @@
pixel PFX(planeClipAndMax_avx2)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
#define DECL_PIXELS(cpu) \
- FUNCDEF_PU(uint32_t, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
+ FUNCDEF_PU(sse_ret_t, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
FUNCDEF_PU(int, pixel_sa8d, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
FUNCDEF_PU(void, pixel_sad_x3, cpu, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*); \
FUNCDEF_PU(void, pixel_sad_x4, cpu, const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*); \
@@ -48,7 +48,7 @@
FUNCDEF_PU(void, pixel_sub_ps, cpu, int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1); \
FUNCDEF_CHROMA_PU(int, pixel_satd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
FUNCDEF_CHROMA_PU(int, pixel_sad, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
- FUNCDEF_CHROMA_PU(uint32_t, pixel_ssd_ss, cpu, const int16_t*, intptr_t, const int16_t*, intptr_t); \
+ FUNCDEF_CHROMA_PU(sse_ret_t, pixel_ssd_ss, cpu, const int16_t*, intptr_t, const int16_t*, intptr_t); \
FUNCDEF_CHROMA_PU(void, addAvg, cpu, const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t); \
FUNCDEF_CHROMA_PU(int, pixel_ssd_s, cpu, const int16_t*, intptr_t); \
FUNCDEF_TU_S(int, pixel_ssd_s, cpu, const int16_t*, intptr_t); \
diff -r 6e7761bdfe23 -r aadec6615a3d source/encoder/rdcost.h
--- a/source/encoder/rdcost.h Wed Sep 30 14:57:15 2015 +0530
+++ b/source/encoder/rdcost.h Wed Sep 30 15:31:39 2015 +0530
@@ -91,7 +91,7 @@
inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const
{
-#if X265_DEPTH <= 10
+#if X265_DEPTH < 10
X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
"calcRdCost wrap detected dist: %u, bits %u, lambda: " X265_LL "\n",
distortion, bits, m_lambda2);
@@ -130,7 +130,7 @@
inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const
{
-#if X265_DEPTH <= 10
+#if X265_DEPTH < 10
X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1],
"scaleChromaDist wrap detected dist: %u, lambda: %u\n",
dist, m_chromaDistWeight[plane - 1]);
More information about the x265-devel
mailing list