[x265] [PATCH 1 of 3] main10: fix dct and MC primitives to correctly respect larger pixel sizes

Tue Nov 5 22:41:33 CET 2013

# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1383684166 21600
#      Tue Nov 05 14:42:46 2013 -0600
# Node ID 4c81c660b25131fb6c2eb25148c5a4137276ff06
# Parent  9b7ca5a14605dbf957e4816959121646917061ef
main10: fix dct and MC primitives to correctly respect larger pixel sizes

diff -r 9b7ca5a14605 -r 4c81c660b251 source/common/dct.cpp

--- a/source/common/dct.cpp	Tue Nov 05 14:15:49 2013 -0600
+++ b/source/common/dct.cpp	Tue Nov 05 14:42:46 2013 -0600
@@ -443,7 +443,7 @@
 
 void dst4_c(int16_t *src, int32_t *dst, intptr_t stride)
 {
-    const int shift_1st = 1;
+    const int shift_1st = 1 + X265_DEPTH - 8;
     const int shift_2nd = 8;
 
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
@@ -471,7 +471,7 @@
 
 void dct4_c(int16_t *src, int32_t *dst, intptr_t stride)
 {
-    const int shift_1st = 1;
+    const int shift_1st = 1 + X265_DEPTH - 8;
     const int shift_2nd = 8;
 
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
@@ -498,7 +498,7 @@
 
 void dct8_c(int16_t *src, int32_t *dst, intptr_t stride)
 {
-    const int shift_1st = 2;
+    const int shift_1st = 2 + X265_DEPTH - 8;
     const int shift_2nd = 9;
 
     ALIGN_VAR_32(int16_t, coef[8 * 8]);
@@ -526,7 +526,7 @@
 
 void dct16_c(int16_t *src, int32_t *dst, intptr_t stride)
 {
-    const int shift_1st = 3;
+    const int shift_1st = 3 + X265_DEPTH - 8;
     const int shift_2nd = 10;
 
     ALIGN_VAR_32(int16_t, coef[16 * 16]);
@@ -554,7 +554,7 @@
 
 void dct32_c(int16_t *src, int32_t *dst, intptr_t stride)
 {
-    const int shift_1st = 4;
+    const int shift_1st = 4 + X265_DEPTH - 8;
     const int shift_2nd = 11;
 
     ALIGN_VAR_32(int16_t, coef[32 * 32]);
@@ -583,7 +583,7 @@
 void idst4_c(int32_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
-    const int shift_2nd = 12;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
 
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
     ALIGN_VAR_32(int16_t, block[4 * 4]);
@@ -611,7 +611,7 @@
 void idct4_c(int32_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
-    const int shift_2nd = 12;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
 
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
     ALIGN_VAR_32(int16_t, block[4 * 4]);
@@ -639,7 +639,7 @@
 void idct8_c(int32_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
-    const int shift_2nd = 12;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
 
     ALIGN_VAR_32(int16_t, coef[8 * 8]);
     ALIGN_VAR_32(int16_t, block[8 * 8]);
@@ -666,7 +666,7 @@
 void idct16_c(int32_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
-    const int shift_2nd = 12;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
 
     ALIGN_VAR_32(int16_t, coef[16 * 16]);
     ALIGN_VAR_32(int16_t, block[16 * 16]);
@@ -693,7 +693,7 @@
 void idct32_c(int32_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 7;
-    const int shift_2nd = 12;
+    const int shift_2nd = 12 - (X265_DEPTH - 8);
 
     ALIGN_VAR_32(int16_t, coef[32 * 32]);
     ALIGN_VAR_32(int16_t, block[32 * 32]);
diff -r 9b7ca5a14605 -r 4c81c660b251 source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Tue Nov 05 14:15:49 2013 -0600
+++ b/source/common/ipfilter.cpp	Tue Nov 05 14:42:46 2013 -0600
@@ -82,7 +82,7 @@
 template<int N>
 void filterHorizontal_pp_c(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int16_t const *coeff)
 {
-    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int headRoom = IF_FILTER_PREC;
     int offset =  (1 << (headRoom - 1));
     uint16_t maxVal = (1 << X265_DEPTH) - 1;
     const int cStride = 1;